From 9f9d98cfd1573bd07114656e06352a0ee0a3d2ad Mon Sep 17 00:00:00 2001 From: Yuzhong Luo Date: Sun, 29 Mar 2026 15:21:13 +0100 Subject: [PATCH 1/2] Fix Python 3.12 compatibility in DT Type_EB table_generator - Replace deprecated plt.style.use('seaborn') with 'seaborn-v0_8' (matplotlib 3.6+ breaking change) - Replace .max()[0] with .max().iloc[0] for pandas 2.0+ compatibility --- src/models/DT/Type_EB/table_generator.py | 1010 +++++++++++----------- 1 file changed, 505 insertions(+), 505 deletions(-) diff --git a/src/models/DT/Type_EB/table_generator.py b/src/models/DT/Type_EB/table_generator.py index 6db346a..57ca364 100755 --- a/src/models/DT/Type_EB/table_generator.py +++ b/src/models/DT/Type_EB/table_generator.py @@ -1,505 +1,505 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import plotting -import copy -import time -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import * -import re -import json -import math - -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if l == 0: - text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - else: - text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Decision Tree - model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - g_table = {} - g_table = generate_table(model, 0, num_features ,g_table, feature_max) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros( num_features) - for i in range(num_features): - code_width_tree_feature[i] = int(np.ceil(math.log( - g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - - Exact_Table = {} - - - Exact_Table['code to vote'] = g_table[0]['code to vote'] - - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] - Ternary_Table = copy.deepcopy(Exact_Table) - for f in range(num_features): - print('') - print('Begine transfer: Feature table ' + str(f)) - Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) - - - # prepare default - collect_votes = [] - Ternary_Table['code to vote'] = {} - for idx in Exact_Table['code to vote']: - collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] - code_table_size = 0 - default_label = max(collect_votes , key = collect_votes.count) - for idx in Exact_Table['code to vote']: - if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: - Ternary_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] - code_table_size += 1 - Exact_Table['code to vote'] = copy.deepcopy(Ternary_Table['code to vote']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "DT" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - - Planter_config['p4 config']["code table size"] = code_table_size - Planter_config['p4 config']["default lable"] = default_label - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - - - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] - if not match_or_not: - print('feature table not matched') - if str(code_list) != str(ternary_code_list): - print('error in exact to ternary match', code_list, ternary_code_list) - - - for key in Exact_Table['code to vote']: - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) - match_or_not = True - break - if not match_or_not: - - switch_prediction = Planter_config['p4 config']["default lable"] - - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"]) ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import plotting +import copy +import time +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import * +import re +import json +import math + +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if l == 0: + text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + else: + text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Decision Tree + model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + g_table = {} + g_table = generate_table(model, 0, num_features ,g_table, feature_max) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros( num_features) + for i in range(num_features): + code_width_tree_feature[i] = int(np.ceil(math.log( + g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + + Exact_Table = {} + + + Exact_Table['code to vote'] = g_table[0]['code to vote'] + + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] + Ternary_Table = copy.deepcopy(Exact_Table) + for f in range(num_features): + print('') + print('Begine transfer: Feature table ' + str(f)) + Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) + + + # prepare default + collect_votes = [] + Ternary_Table['code to vote'] = {} + for idx in Exact_Table['code to vote']: + collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] + code_table_size = 0 + default_label = max(collect_votes , key = collect_votes.count) + for idx in Exact_Table['code to vote']: + if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: + Ternary_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] + code_table_size += 1 + Exact_Table['code to vote'] = copy.deepcopy(Ternary_Table['code to vote']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "DT" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + + Planter_config['p4 config']["code table size"] = code_table_size + Planter_config['p4 config']["default lable"] = default_label + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + + + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] + if not match_or_not: + print('feature table not matched') + if str(code_list) != str(ternary_code_list): + print('error in exact to ternary match', code_list, ternary_code_list) + + + for key in Exact_Table['code to vote']: + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) + match_or_not = True + break + if not match_or_not: + + switch_prediction = Planter_config['p4 config']["default lable"] + + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"]) ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + From ef4b6411df20d1e6a1318fdaf1ef61822cd083e3 Mon Sep 17 00:00:00 2001 From: Yuzhong Luo Date: Sun, 29 Mar 2026 16:41:14 +0100 Subject: [PATCH 2/2] Fix Python 3.12 compatibility across all model table_generators - Replace plt.style.use('seaborn') with 'seaborn-v0_8' in all models (matplotlib 3.6+ removed the seaborn style alias) - Replace .max()[0] with .max().iloc[0] in all models (pandas 2.0+ no longer supports integer indexing on named Series) Fixes affect 15+ model types including DT, RF, XGB, SVM, Bayes, KM, KNN, NN, IF, PCA, Autoencoder across all type variations. Tested on: Python 3.12.3, matplotlib 3.x, pandas 2.4.3, Ubuntu 24.04 --- src/models/Autoencoder/Type_1/dedicated_p4.py | 570 ++++---- src/models/Autoencoder/Type_1/readme.md | 2 +- .../Autoencoder/Type_1/table_generator.py | 568 ++++---- .../Autoencoder/Type_LB/dedicated_p4.py | 596 ++++---- src/models/Autoencoder/Type_LB/readme.md | 2 +- .../Autoencoder/Type_LB/table_generator.py | 578 ++++---- src/models/Autoencoder/readme.md | 2 +- src/models/Bayes/Type_1/dedicated_p4.py | 666 ++++----- src/models/Bayes/Type_1/readme.md | 2 +- src/models/Bayes/Type_1/table_generator.py | 550 ++++---- src/models/Bayes/Type_2/dedicated_p4.py | 700 +++++----- src/models/Bayes/Type_2/readme.md | 2 +- src/models/Bayes/Type_2/table_generator.py | 532 +++---- src/models/Bayes/Type_3/dedicated_p4.py | 720 +++++----- src/models/Bayes/Type_3/readme.md | 2 +- src/models/Bayes/Type_3/table_generator.py | 512 +++---- src/models/Bayes/Type_LB/dedicated_p4.py | 670 ++++----- src/models/Bayes/Type_LB/readme.md | 2 +- src/models/Bayes/Type_LB/table_generator.py | 546 ++++---- .../Bayes/Type_LB_Bernoulli/dedicated_p4.py | 668 ++++----- src/models/Bayes/Type_LB_Bernoulli/readme.md | 2 +- .../Type_LB_Bernoulli/table_generator.py | 600 ++++---- src/models/Bayes/readme.md | 2 +- src/models/DT/Type_1/dedicated_p4.py | 530 +++---- src/models/DT/Type_1/readme.md | 2 +- src/models/DT/Type_1/table_generator.py | 912 ++++++------ src/models/DT/Type_1_xsa/dedicated_p4.py | 688 ++++----- src/models/DT/Type_1_xsa/readme.md | 2 +- src/models/DT/Type_1_xsa/table_generator.py | 908 ++++++------ src/models/DT/Type_2/dedicated_p4.py | 552 ++++---- src/models/DT/Type_2/readme.md | 2 +- src/models/DT/Type_2/table_generator.py | 934 ++++++------- src/models/DT/Type_3/dedicated_p4.py | 616 ++++----- src/models/DT/Type_3/readme.md | 2 +- src/models/DT/Type_3/table_generator.py | 1008 +++++++------- src/models/DT/Type_4/dedicated_p4.py | 618 ++++----- src/models/DT/Type_4/readme.md | 2 +- src/models/DT/Type_4/table_generator.py | 1012 +++++++------- src/models/DT/Type_5/dedicated_p4.py | 504 +++---- src/models/DT/Type_5/readme.md | 2 +- src/models/DT/Type_5/table_generator.py | 998 +++++++------- src/models/DT/Type_DM/dedicated_p4.py | 550 ++++---- src/models/DT/Type_DM/readme.md | 2 +- src/models/DT/Type_DM/table_generator.py | 796 +++++------ src/models/DT/Type_EB/dedicated_p4.py | 598 ++++---- src/models/DT/Type_EB/readme.md | 2 +- .../dedicated_p4.py | 550 ++++---- .../DT/Type_depth_based_bmv2_only/readme.md | 2 +- .../table_generator.py | 786 +++++------ src/models/DT/readme.md | 2 +- src/models/IF/Type_1/dedicated_p4.py | 634 ++++----- src/models/IF/Type_1/readme.md | 2 +- src/models/IF/Type_1/table_generator.py | 1224 ++++++++-------- src/models/IF/Type_2/dedicated_p4.py | 634 ++++----- src/models/IF/Type_2/readme.md | 2 +- src/models/IF/Type_2/table_generator.py | 1108 +++++++-------- src/models/IF/Type_EB/dedicated_p4.py | 634 ++++----- src/models/IF/Type_EB/readme.md | 2 +- src/models/IF/Type_EB/table_generator.py | 1226 ++++++++--------- .../IF/Type_Simplified_EB/dedicated_p4.py | 634 ++++----- src/models/IF/Type_Simplified_EB/readme.md | 2 +- .../IF/Type_Simplified_EB/table_generator.py | 1116 +++++++-------- src/models/IF/readme.md | 2 +- src/models/KM/Type_1/dedicated_p4.py | 600 ++++---- src/models/KM/Type_1/readme.md | 2 +- src/models/KM/Type_1/table_generator.py | 434 +++--- src/models/KM/Type_EB/dedicated_p4.py | 358 ++--- src/models/KM/Type_EB/readme.md | 2 +- src/models/KM/Type_EB/table_generator.py | 734 +++++----- src/models/KM/Type_LB/dedicated_p4.py | 616 ++++----- src/models/KM/Type_LB/readme.md | 2 +- src/models/KM/Type_LB/table_generator.py | 436 +++--- src/models/KM/Type_clustreams/dedicated_p4.py | 350 ++--- src/models/KM/Type_clustreams/readme.md | 2 +- .../KM/Type_clustreams/table_generator.py | 732 +++++----- src/models/KM/readme.md | 2 +- src/models/KNN/Type_1/dedicated_p4.py | 352 ++--- src/models/KNN/Type_1/readme.md | 2 +- src/models/KNN/Type_1/table_generator.py | 684 ++++----- src/models/KNN/Type_EB/dedicated_p4.py | 358 ++--- src/models/KNN/Type_EB/readme.md | 2 +- src/models/KNN/Type_EB/table_generator.py | 688 ++++----- src/models/KNN/readme.md | 2 +- src/models/NN/Type_1/BinaryNet/README.md | 104 +- .../BinaryNet/classifiers/bnn_classifier.py | 224 +-- .../classifiers/dorefa_classifier.py | 218 +-- .../BinaryNet/classifiers/xnor_classifier.py | 256 ++-- src/models/NN/Type_1/BinaryNet/config.py | 338 ++--- .../Type_1/BinaryNet/dataloader/__init__.py | 4 +- .../NN/Type_1/BinaryNet/dataloader/cifar10.py | 78 +- .../NN/Type_1/BinaryNet/dataloader/mnist.py | 76 +- src/models/NN/Type_1/BinaryNet/main.py | 98 +- .../NN/Type_1/BinaryNet/models/__init__.py | 10 +- .../Type_1/BinaryNet/models/bnn_caffenet.py | 122 +- .../NN/Type_1/BinaryNet/models/bnn_layers.py | 124 +- .../Type_1/BinaryNet/models/dorefa_layers.py | 220 +-- .../Type_1/BinaryNet/models/dorefa_resnet.py | 308 ++--- .../NN/Type_1/BinaryNet/models/xnor_layers.py | 290 ++-- .../NN/Type_1/BinaryNet/models/xnor_lenet.py | 90 +- .../NN/Type_1/BinaryNet/models/xnor_mlp.py | 88 +- .../NN/Type_1/BinaryNet/models/xnor_nin.py | 110 +- .../NN/Type_1/BinaryNet/requirements.txt | 10 +- .../BinaryNet/yml/bnn_caffenet_cifar10.yml | 34 +- .../BinaryNet/yml/dorefa_resnet_cifar10.yml | 36 +- .../NN/Type_1/BinaryNet/yml/lenet_mnist.yml | 32 +- .../NN/Type_1/BinaryNet/yml/mlp_mnist.yml | 32 +- .../NN/Type_1/BinaryNet/yml/nin_cifar10.yml | 32 +- src/models/NN/Type_1/dedicated_p4.py | 610 ++++---- src/models/NN/Type_1/readme.md | 2 +- src/models/NN/Type_1/table_generator.py | 608 ++++---- src/models/NN/Type_2/BinaryNet/README.md | 104 +- .../BinaryNet/classifiers/bnn_classifier.py | 224 +-- .../classifiers/dorefa_classifier.py | 218 +-- .../BinaryNet/classifiers/xnor_classifier.py | 256 ++-- src/models/NN/Type_2/BinaryNet/config.py | 338 ++--- .../Type_2/BinaryNet/dataloader/__init__.py | 4 +- .../NN/Type_2/BinaryNet/dataloader/cifar10.py | 78 +- .../NN/Type_2/BinaryNet/dataloader/mnist.py | 76 +- src/models/NN/Type_2/BinaryNet/main.py | 98 +- .../NN/Type_2/BinaryNet/models/__init__.py | 10 +- .../Type_2/BinaryNet/models/bnn_caffenet.py | 122 +- .../NN/Type_2/BinaryNet/models/bnn_layers.py | 124 +- .../Type_2/BinaryNet/models/dorefa_layers.py | 220 +-- .../Type_2/BinaryNet/models/dorefa_resnet.py | 308 ++--- .../NN/Type_2/BinaryNet/models/xnor_layers.py | 290 ++-- .../NN/Type_2/BinaryNet/models/xnor_lenet.py | 90 +- .../NN/Type_2/BinaryNet/models/xnor_mlp.py | 88 +- .../NN/Type_2/BinaryNet/models/xnor_nin.py | 110 +- .../NN/Type_2/BinaryNet/requirements.txt | 10 +- .../BinaryNet/yml/bnn_caffenet_cifar10.yml | 34 +- .../BinaryNet/yml/dorefa_resnet_cifar10.yml | 36 +- .../NN/Type_2/BinaryNet/yml/lenet_mnist.yml | 32 +- .../NN/Type_2/BinaryNet/yml/mlp_mnist.yml | 32 +- .../NN/Type_2/BinaryNet/yml/nin_cifar10.yml | 32 +- src/models/NN/Type_2/dedicated_p4.py | 624 ++++----- src/models/NN/Type_2/readme.md | 2 +- src/models/NN/Type_2/table_generator.py | 718 +++++----- src/models/NN/Type_DM/BinaryNet/README.md | 104 +- .../BinaryNet/classifiers/bnn_classifier.py | 224 +-- .../classifiers/dorefa_classifier.py | 218 +-- .../BinaryNet/classifiers/xnor_classifier.py | 256 ++-- src/models/NN/Type_DM/BinaryNet/config.py | 338 ++--- .../Type_DM/BinaryNet/dataloader/__init__.py | 4 +- .../Type_DM/BinaryNet/dataloader/cifar10.py | 78 +- .../NN/Type_DM/BinaryNet/dataloader/mnist.py | 76 +- src/models/NN/Type_DM/BinaryNet/main.py | 98 +- .../NN/Type_DM/BinaryNet/models/__init__.py | 10 +- .../Type_DM/BinaryNet/models/bnn_caffenet.py | 122 +- .../NN/Type_DM/BinaryNet/models/bnn_layers.py | 124 +- .../Type_DM/BinaryNet/models/dorefa_layers.py | 220 +-- .../Type_DM/BinaryNet/models/dorefa_resnet.py | 308 ++--- .../Type_DM/BinaryNet/models/xnor_layers.py | 290 ++-- .../NN/Type_DM/BinaryNet/models/xnor_lenet.py | 90 +- .../NN/Type_DM/BinaryNet/models/xnor_mlp.py | 88 +- .../NN/Type_DM/BinaryNet/models/xnor_nin.py | 110 +- .../NN/Type_DM/BinaryNet/requirements.txt | 10 +- .../BinaryNet/yml/bnn_caffenet_cifar10.yml | 34 +- .../BinaryNet/yml/dorefa_resnet_cifar10.yml | 36 +- .../NN/Type_DM/BinaryNet/yml/lenet_mnist.yml | 32 +- .../NN/Type_DM/BinaryNet/yml/mlp_mnist.yml | 32 +- .../NN/Type_DM/BinaryNet/yml/nin_cifar10.yml | 32 +- src/models/NN/Type_DM/dedicated_p4.py | 632 ++++----- src/models/NN/Type_DM/readme.md | 2 +- src/models/NN/Type_DM/table_generator.py | 602 ++++---- src/models/NN/readme.md | 2 +- src/models/PCA/Type_1/dedicated_p4.py | 474 +++---- src/models/PCA/Type_1/readme.md | 2 +- src/models/PCA/Type_1/table_generator.py | 388 +++--- src/models/PCA/Type_LB/dedicated_p4.py | 482 +++---- src/models/PCA/Type_LB/readme.md | 2 +- src/models/PCA/Type_LB/table_generator.py | 388 +++--- src/models/PCA/readme.md | 2 +- src/models/RF/Type_1/dedicated_p4.py | 702 +++++----- src/models/RF/Type_1/readme.md | 2 +- src/models/RF/Type_1/table_generator.py | 1068 +++++++------- src/models/RF/Type_1_xsa/dedicated_p4.py | 880 ++++++------ src/models/RF/Type_1_xsa/readme.md | 2 +- src/models/RF/Type_1_xsa/table_generator.py | 1056 +++++++------- src/models/RF/Type_2/dedicated_p4.py | 736 +++++----- src/models/RF/Type_2/readme.md | 2 +- src/models/RF/Type_2/table_generator.py | 1106 +++++++-------- src/models/RF/Type_3/dedicated_p4.py | 646 ++++----- src/models/RF/Type_3/readme.md | 2 +- src/models/RF/Type_3/table_generator.py | 1158 ++++++++-------- src/models/RF/Type_4/dedicated_p4.py | 634 ++++----- src/models/RF/Type_4/readme.md | 2 +- src/models/RF/Type_4/table_generator.py | 1170 ++++++++-------- src/models/RF/Type_5/dedicated_p4.py | 634 ++++----- src/models/RF/Type_5/readme.md | 2 +- src/models/RF/Type_5/table_generator.py | 1194 ++++++++-------- src/models/RF/Type_DM/dedicated_p4.py | 526 +++---- src/models/RF/Type_DM/readme.md | 2 +- src/models/RF/Type_DM/table_generator.py | 758 +++++----- .../RF/Type_DM_bmv2_only/dedicated_p4.py | 546 ++++---- src/models/RF/Type_DM_bmv2_only/readme.md | 2 +- .../RF/Type_DM_bmv2_only/table_generator.py | 788 +++++------ src/models/RF/Type_EB/dedicated_p4.py | 622 ++++----- src/models/RF/Type_EB/readme.md | 2 +- src/models/RF/Type_EB/table_generator.py | 1172 ++++++++-------- src/models/RF/Type_EB_auto/dedicated_p4.py | 646 ++++----- src/models/RF/Type_EB_auto/readme.md | 2 +- src/models/RF/Type_EB_auto/table_generator.py | 1200 ++++++++-------- .../RF/Type_depth_based/dedicated_p4.py | 524 +++---- src/models/RF/Type_depth_based/readme.md | 2 +- .../RF/Type_depth_based/table_generator.py | 758 +++++----- .../dedicated_p4.py | 546 ++++---- .../RF/Type_depth_based_bmv2_only/readme.md | 2 +- .../table_generator.py | 792 +++++------ src/models/RF/readme.md | 2 +- src/models/SVM/Type_1/dedicated_p4.py | 702 +++++----- src/models/SVM/Type_1/readme.md | 2 +- src/models/SVM/Type_1/table_generator.py | 644 ++++----- src/models/SVM/Type_LB/dedicated_p4.py | 698 +++++----- src/models/SVM/Type_LB/readme.md | 2 +- src/models/SVM/Type_LB/table_generator.py | 644 ++++----- src/models/SVM/readme.md | 2 +- src/models/XGB/Type_1/dedicated_p4.py | 624 ++++----- src/models/XGB/Type_1/readme.md | 2 +- src/models/XGB/Type_1/table_generator.py | 1102 +++++++-------- src/models/XGB/Type_2/dedicated_p4.py | 638 ++++----- src/models/XGB/Type_2/readme.md | 2 +- src/models/XGB/Type_2/table_generator.py | 1102 +++++++-------- src/models/XGB/Type_2_xsa/dedicated_p4.py | 854 ++++++------ src/models/XGB/Type_2_xsa/readme.md | 2 +- src/models/XGB/Type_2_xsa/table_generator.py | 1112 +++++++-------- src/models/XGB/Type_3/dedicated_p4.py | 638 ++++----- src/models/XGB/Type_3/readme.md | 2 +- src/models/XGB/Type_3/table_generator.py | 1140 +++++++-------- src/models/XGB/Type_EB/dedicated_p4.py | 628 ++++----- src/models/XGB/Type_EB/readme.md | 2 +- src/models/XGB/Type_EB/table_generator.py | 1118 +++++++-------- src/models/XGB/Type_EB_auto/dedicated_p4.py | 628 ++++----- src/models/XGB/Type_EB_auto/readme.md | 2 +- .../XGB/Type_EB_auto/table_generator.py | 1166 ++++++++-------- src/models/XGB/readme.md | 2 +- 235 files changed, 41170 insertions(+), 41170 deletions(-) diff --git a/src/models/Autoencoder/Type_1/dedicated_p4.py b/src/models/Autoencoder/Type_1/dedicated_p4.py index b2b4bd0..5554bbd 100755 --- a/src/models/Autoencoder/Type_1/dedicated_p4.py +++ b/src/models/Autoencoder/Type_1/dedicated_p4.py @@ -1,285 +1,285 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - config['num_axis'] = Planter_config['p4 config']["num components"] - config['model_type'] = Planter_config['test config']['type of test'] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - for ax in range(0, config['num_axis']): - headers.write(" bit<32> middle_ax" + str(ax) + ";\n") - - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for ax in range(0, config['num_axis']): - if ax==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) - ingress.write("){\n") - - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { meta.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_bias(") - for ax in range(0, config['num_axis']): - if ax==0: - ingress.write("bit<" + str(config['num_bits']) + "> bias_ax" + str(ax)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> bias_ax"+str(ax)) - - ingress.write("){\n") - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+ " = (bit<32>)bias_ax"+str(ax) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table bias {\n" - " key = {meta.flag:exact;}\n" - " actions={read_bias; NoAction;}\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action wrap_back( ){\n") - for ax in range(0, config['num_axis']): - ingress.write(" meta.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") - ingress.write(" }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" bias.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" wrap_back();\n") - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature" + str(f) - Entry["match"] = {} - Entry["match"]["meta.feature" + str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) - Entry["action_params"] = {} - for ax in range(num_components): - Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] - Table_entries += [Entry] - - Entry = {} - Entry["table"] = "SwitchIngress.bias" - Entry["match"] = {} - Entry["match"]["meta.flag"] = 1 - Entry["action_name"] = "SwitchIngress.read_bias" - Entry["action_params"] = {} - for ax in range(num_components): - Entry["action_params"]["bias_ax" + str(ax)] = np.int(Exact_Table["bias"]['ax' + str(ax)]) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_components = config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) - for ax in range(num_components): - label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] - file.write( str(label) + " ") - file.write("\n") - file.write("\n") - - - - file.write("table_add SwitchIngress.bias read_bias 1 => " ) - - for ax in range(num_components): - file.write(str(label) + " ") - label = np.int(Exact_Table["bias"]['ax' + str(ax)]) - file.write("\n") - file.write("\n") - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for ax in range(0, config['num_axis']): - if ax==0: - tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.bias.add_with_read_bias(" - "1, ") - for ax in range(0, config['num_axis']): - if ax == 0: - tload.write("table['bias']['ax" + str(ax) + "']") - else: - tload.write(", table['bias']['ax" + str(ax) + "']") - - tload.write(")\n\n") - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + config['num_axis'] = Planter_config['p4 config']["num components"] + config['model_type'] = Planter_config['test config']['type of test'] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + for ax in range(0, config['num_axis']): + headers.write(" bit<32> middle_ax" + str(ax) + ";\n") + + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for ax in range(0, config['num_axis']): + if ax==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) + ingress.write("){\n") + + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { meta.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_bias(") + for ax in range(0, config['num_axis']): + if ax==0: + ingress.write("bit<" + str(config['num_bits']) + "> bias_ax" + str(ax)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> bias_ax"+str(ax)) + + ingress.write("){\n") + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+ " = (bit<32>)bias_ax"+str(ax) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table bias {\n" + " key = {meta.flag:exact;}\n" + " actions={read_bias; NoAction;}\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action wrap_back( ){\n") + for ax in range(0, config['num_axis']): + ingress.write(" meta.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") + ingress.write(" }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" bias.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" wrap_back();\n") + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature" + str(f) + Entry["match"] = {} + Entry["match"]["meta.feature" + str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) + Entry["action_params"] = {} + for ax in range(num_components): + Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] + Table_entries += [Entry] + + Entry = {} + Entry["table"] = "SwitchIngress.bias" + Entry["match"] = {} + Entry["match"]["meta.flag"] = 1 + Entry["action_name"] = "SwitchIngress.read_bias" + Entry["action_params"] = {} + for ax in range(num_components): + Entry["action_params"]["bias_ax" + str(ax)] = np.int(Exact_Table["bias"]['ax' + str(ax)]) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_components = config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) + for ax in range(num_components): + label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] + file.write( str(label) + " ") + file.write("\n") + file.write("\n") + + + + file.write("table_add SwitchIngress.bias read_bias 1 => " ) + + for ax in range(num_components): + file.write(str(label) + " ") + label = np.int(Exact_Table["bias"]['ax' + str(ax)]) + file.write("\n") + file.write("\n") + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for ax in range(0, config['num_axis']): + if ax==0: + tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.bias.add_with_read_bias(" + "1, ") + for ax in range(0, config['num_axis']): + if ax == 0: + tload.write("table['bias']['ax" + str(ax) + "']") + else: + tload.write(", table['bias']['ax" + str(ax) + "']") + + tload.write(")\n\n") + + diff --git a/src/models/Autoencoder/Type_1/readme.md b/src/models/Autoencoder/Type_1/readme.md index 7e3a873..52873d0 100644 --- a/src/models/Autoencoder/Type_1/readme.md +++ b/src/models/Autoencoder/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Autoencoder. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Autoencoder. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Autoencoder/Type_1/table_generator.py b/src/models/Autoencoder/Type_1/table_generator.py index 749da07..e87796c 100755 --- a/src/models/Autoencoder/Type_1/table_generator.py +++ b/src/models/Autoencoder/Type_1/table_generator.py @@ -1,284 +1,284 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import torch.nn as nn -from torch.autograd import Variable as V -import torch -from torch.utils.data import DataLoader, Dataset, TensorDataset -from sklearn.datasets import load_iris -from sklearn.preprocessing import MinMaxScaler -import numpy -import matplotlib.pyplot as plt -from sklearn.ensemble import IsolationForest -import numpy as np -import json -import copy -from scipy.stats import pearsonr -from src.functions.json_encoder import * -import math -import time -import time -# print(isinstance(my_dataset,Dataset)) - - -###### Define an autoencoder model -class autoencoder(nn.Module): - def __init__(self, num_features, num_components): - super(autoencoder, self).__init__() - self.encoder = nn.Sequential( - nn.Linear(num_features, num_components), - ) - self.decoder = nn.Sequential( - nn.Linear(num_components, num_features), - ) - - def forward(self, x): - encoder = self.encoder(x) - decoder = self.decoder(encoder) - return encoder, decoder - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - Planter_config['model config']['num components'] = np.int(input('- Number of components? (default = 2) ') or '2') - Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') - Planter_config['model config']['batch size'] = np.int(input('- Model batch size? (default = 100) ') or '100') - Planter_config['model config']['num epoch'] = np.int(input('- Number of training epoch? (default = 50) ') or '50') - Planter_config['model config']['number of bits'] = np.int( - input('- Number of bits for each action data? (default = 16) ') or '16') - - num_bits = Planter_config['model config']['number of bits'] - - num_components = Planter_config['model config']['num components'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - learning_rate = Planter_config['model config']['learning rate'] - batch_size = Planter_config['model config']['batch size'] - num_epoch = Planter_config['model config']['num epoch'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [max(t_t)+1] - ###### Normalize the input as the autoencoder only uses the input - # MMScaler = MinMaxScaler() - # x = MMScaler.fit_transform(x) - # iforestX = x - - ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 - tensor_x = torch.from_numpy(train_X.to_numpy().astype(numpy.float32)) - tensor_y = torch.from_numpy(train_y.astype(numpy.float32)) - # X_new = copy.deepcopy(test_X) - sklearn_X_new = copy.deepcopy(test_X) - test_X = torch.from_numpy(test_X.to_numpy().astype(numpy.float32)) - test_y = torch.from_numpy(test_y.astype(numpy.float32)) - my_dataset = TensorDataset(tensor_x, tensor_y) - my_test_dataset = TensorDataset(test_X, test_y) - my_dataset_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=False) - - - model = autoencoder(num_features,num_components) - - ####### Define the loss function - - criterion = nn.MSELoss() - - ####### Define the optimization function - optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # If using SGD, convergence does not decrease - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - ####### Set epoch to 300 - - for epoch in range(num_epoch): - total_loss = 0 - for i, (x, y) in enumerate(my_dataset_loader): - _, pred = model(V(x)) - loss = criterion(pred, x) - - - optimizer.zero_grad() - loss.backward() - optimizer.step() - total_loss += loss - if epoch % 10 == 0: - print('\rTraining loss {}'.format(total_loss.data.numpy()),end=" ") - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - model_info = {} - for i, param in enumerate (model.parameters()): - # print(param) - model_info[i] = param.detach().numpy() - model_info['weights'] = model_info[0].T - model_info['bias'] = model_info[1] - ###### Perform dimensionality reduction and visualization based on the trained model - - print('\nGenerate the table...',end="") - - value_info = {} - value_info["max"] = np.max(model_info['bias']) - value_info["min"] = np.min(model_info['bias']) - for ax in range(num_components): - value_info["ax " + str(ax)] = {} - value_info["ax " + str(ax)]["max"] = model_info['bias'][ax] - value_info["ax " + str(ax)]["min"] = model_info['bias'][ax] - - g_table = {} - for f in range(num_features): - g_table['feature ' + str(f)] = {} - for input_value in range(feature_max[f]): - g_table['feature ' + str(f)][input_value] = {} - for ax in range(num_components): - middle_value = copy.deepcopy(input_value * model_info['weights'][f, ax]) - g_table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value - if middle_value > value_info["ax " + str(ax)]["max"]: - value_info["ax " + str(ax)]["max"] = middle_value - if middle_value < value_info["ax " + str(ax)]["min"]: - value_info["ax " + str(ax)]["min"] = middle_value - if middle_value > value_info["max"]: - value_info["max"] = middle_value - if middle_value < value_info["min"]: - value_info["min"] = middle_value - - scale = (2 ** num_bits) / ((value_info["max"] - value_info["min"]) * (num_features+1)) - - Exact_Table = {} - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for input_value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][input_value] = {} - for ax in range(num_components): - middle_value = copy.deepcopy(g_table['feature ' + str(f)][input_value]['ax' + str(ax)]) - middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) - Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value - - Exact_Table['bias'] = {} - for ax in range(num_components): - Exact_Table['bias']['ax' + str(ax)] = np.int(np.floor((model_info['bias'][ax]- value_info["min"])*scale)) - - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - print('Done') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "Autoencoder" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['p4 config']["num components"] = num_components - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'dimension_reduction' - - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - x_ = [] - y_ = [] - for i, (x, y) in enumerate(my_test_dataset): - _, pred = model(V(x)) - dimension = _.data.numpy() - for ax in range(num_components): - sklearn_X_new.values[i,ax] = dimension[ax] - (num_features + 1) * value_info["min"] - # prepare for plot - x_.append(dimension[0]-(num_features+1)*value_info["min"]) - y_.append(dimension[1]-(num_features+1)*value_info["min"]) - - - plot_result = input('- Plot the training result ? (default = n) ') or 'n' - - if plot_result == 'y': - print('plot') - plt.scatter(numpy.array(x_), numpy.array(y_), c=test_y.detach().numpy()) - - for i in range(len(numpy.array(x_))): - plt.annotate(i, (x_[i], y_[i])) - plt.show() - return sklearn_X_new.values - - -def test_tables(sklearn_test_x, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_x = copy.deepcopy(sklearn_test_x) - - - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - for ax in range(num_components): - switch_test_x[i][ax] = copy.deepcopy(Exact_Table['bias']['ax'+str(ax)]) - for f in range(num_features): - ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] - for ax in range(num_components): - switch_test_x[i][ax] += ax_middle["ax"+str(ax)] - # print(sklearn_test_x[i], switch_test_x[i]) - # test_X.values[i] - # switch_test_x.values[i] - for ax in range(num_components): - corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) - print('Pearsons correlation of M/A result and output of Pytorch for axis '+str(ax)+' is: %.4f' % corr) - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import torch.nn as nn +from torch.autograd import Variable as V +import torch +from torch.utils.data import DataLoader, Dataset, TensorDataset +from sklearn.datasets import load_iris +from sklearn.preprocessing import MinMaxScaler +import numpy +import matplotlib.pyplot as plt +from sklearn.ensemble import IsolationForest +import numpy as np +import json +import copy +from scipy.stats import pearsonr +from src.functions.json_encoder import * +import math +import time +import time +# print(isinstance(my_dataset,Dataset)) + + +###### Define an autoencoder model +class autoencoder(nn.Module): + def __init__(self, num_features, num_components): + super(autoencoder, self).__init__() + self.encoder = nn.Sequential( + nn.Linear(num_features, num_components), + ) + self.decoder = nn.Sequential( + nn.Linear(num_components, num_features), + ) + + def forward(self, x): + encoder = self.encoder(x) + decoder = self.decoder(encoder) + return encoder, decoder + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + Planter_config['model config']['num components'] = np.int(input('- Number of components? (default = 2) ') or '2') + Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') + Planter_config['model config']['batch size'] = np.int(input('- Model batch size? (default = 100) ') or '100') + Planter_config['model config']['num epoch'] = np.int(input('- Number of training epoch? (default = 50) ') or '50') + Planter_config['model config']['number of bits'] = np.int( + input('- Number of bits for each action data? (default = 16) ') or '16') + + num_bits = Planter_config['model config']['number of bits'] + + num_components = Planter_config['model config']['num components'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + learning_rate = Planter_config['model config']['learning rate'] + batch_size = Planter_config['model config']['batch size'] + num_epoch = Planter_config['model config']['num epoch'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [max(t_t)+1] + ###### Normalize the input as the autoencoder only uses the input + # MMScaler = MinMaxScaler() + # x = MMScaler.fit_transform(x) + # iforestX = x + + ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 + tensor_x = torch.from_numpy(train_X.to_numpy().astype(numpy.float32)) + tensor_y = torch.from_numpy(train_y.astype(numpy.float32)) + # X_new = copy.deepcopy(test_X) + sklearn_X_new = copy.deepcopy(test_X) + test_X = torch.from_numpy(test_X.to_numpy().astype(numpy.float32)) + test_y = torch.from_numpy(test_y.astype(numpy.float32)) + my_dataset = TensorDataset(tensor_x, tensor_y) + my_test_dataset = TensorDataset(test_X, test_y) + my_dataset_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=False) + + + model = autoencoder(num_features,num_components) + + ####### Define the loss function + + criterion = nn.MSELoss() + + ####### Define the optimization function + optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # If using SGD, convergence does not decrease + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + ####### Set epoch to 300 + + for epoch in range(num_epoch): + total_loss = 0 + for i, (x, y) in enumerate(my_dataset_loader): + _, pred = model(V(x)) + loss = criterion(pred, x) + + + optimizer.zero_grad() + loss.backward() + optimizer.step() + total_loss += loss + if epoch % 10 == 0: + print('\rTraining loss {}'.format(total_loss.data.numpy()),end=" ") + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + model_info = {} + for i, param in enumerate (model.parameters()): + # print(param) + model_info[i] = param.detach().numpy() + model_info['weights'] = model_info[0].T + model_info['bias'] = model_info[1] + ###### Perform dimensionality reduction and visualization based on the trained model + + print('\nGenerate the table...',end="") + + value_info = {} + value_info["max"] = np.max(model_info['bias']) + value_info["min"] = np.min(model_info['bias']) + for ax in range(num_components): + value_info["ax " + str(ax)] = {} + value_info["ax " + str(ax)]["max"] = model_info['bias'][ax] + value_info["ax " + str(ax)]["min"] = model_info['bias'][ax] + + g_table = {} + for f in range(num_features): + g_table['feature ' + str(f)] = {} + for input_value in range(feature_max[f]): + g_table['feature ' + str(f)][input_value] = {} + for ax in range(num_components): + middle_value = copy.deepcopy(input_value * model_info['weights'][f, ax]) + g_table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value + if middle_value > value_info["ax " + str(ax)]["max"]: + value_info["ax " + str(ax)]["max"] = middle_value + if middle_value < value_info["ax " + str(ax)]["min"]: + value_info["ax " + str(ax)]["min"] = middle_value + if middle_value > value_info["max"]: + value_info["max"] = middle_value + if middle_value < value_info["min"]: + value_info["min"] = middle_value + + scale = (2 ** num_bits) / ((value_info["max"] - value_info["min"]) * (num_features+1)) + + Exact_Table = {} + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for input_value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][input_value] = {} + for ax in range(num_components): + middle_value = copy.deepcopy(g_table['feature ' + str(f)][input_value]['ax' + str(ax)]) + middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) + Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value + + Exact_Table['bias'] = {} + for ax in range(num_components): + Exact_Table['bias']['ax' + str(ax)] = np.int(np.floor((model_info['bias'][ax]- value_info["min"])*scale)) + + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + print('Done') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "Autoencoder" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['p4 config']["num components"] = num_components + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'dimension_reduction' + + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + x_ = [] + y_ = [] + for i, (x, y) in enumerate(my_test_dataset): + _, pred = model(V(x)) + dimension = _.data.numpy() + for ax in range(num_components): + sklearn_X_new.values[i,ax] = dimension[ax] - (num_features + 1) * value_info["min"] + # prepare for plot + x_.append(dimension[0]-(num_features+1)*value_info["min"]) + y_.append(dimension[1]-(num_features+1)*value_info["min"]) + + + plot_result = input('- Plot the training result ? (default = n) ') or 'n' + + if plot_result == 'y': + print('plot') + plt.scatter(numpy.array(x_), numpy.array(y_), c=test_y.detach().numpy()) + + for i in range(len(numpy.array(x_))): + plt.annotate(i, (x_[i], y_[i])) + plt.show() + return sklearn_X_new.values + + +def test_tables(sklearn_test_x, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_x = copy.deepcopy(sklearn_test_x) + + + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + for ax in range(num_components): + switch_test_x[i][ax] = copy.deepcopy(Exact_Table['bias']['ax'+str(ax)]) + for f in range(num_features): + ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] + for ax in range(num_components): + switch_test_x[i][ax] += ax_middle["ax"+str(ax)] + # print(sklearn_test_x[i], switch_test_x[i]) + # test_X.values[i] + # switch_test_x.values[i] + for ax in range(num_components): + corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) + print('Pearsons correlation of M/A result and output of Pytorch for axis '+str(ax)+' is: %.4f' % corr) + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/Autoencoder/Type_LB/dedicated_p4.py b/src/models/Autoencoder/Type_LB/dedicated_p4.py index 053ebf4..faaaa71 100755 --- a/src/models/Autoencoder/Type_LB/dedicated_p4.py +++ b/src/models/Autoencoder/Type_LB/dedicated_p4.py @@ -1,298 +1,298 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - config['num_axis'] = Planter_config['p4 config']["num components"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for ax in range(0, config['num_axis']): - # headers.write(" bit<" + str(config['num_bits']) + "> middle_ax" + str(ax) + ";\n") - headers.write(" bit<32> middle_ax" + str(ax) + ";\n") - - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for ax in range(0, config['num_axis']): - if ax==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) - ingress.write("){\n") - - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_bias(") - for ax in range(0, config['num_axis']): - if ax==0: - ingress.write("bit<" + str(config['num_bits']) + "> bias_ax" + str(ax)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> bias_ax"+str(ax)) - - ingress.write("){\n") - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+ " = (bit<32>)bias_ax"+str(ax) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table bias {\n" - " key = {hdr.Planter.ver:exact;}\n" - " actions={read_bias; NoAction;}\n" - # " const entries = {\n" - # " 0 : "+config['thresh_and_bias'] +"};\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action wrap_back( ){\n") - for ax in range(0, config['num_axis']): - ingress.write(" hdr.Planter.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") - ingress.write(" }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" bias.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" wrap_back();\n") - ingress.write(" /* Swap the MAC addresses */\n" - " bit<48> tmp;\n" - " tmp = hdr.ethernet.dstAddr;\n" - " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" - " hdr.ethernet.srcAddr = tmp;\n" - # " send(3);\n") - " send(ig_intr_md.ingress_port);\n") - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature" + str(f) - Entry["match"] = {} - Entry["match"]["hdr.Planter.feature" + str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) - Entry["action_params"] = {} - for ax in range(num_components): - Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] - Table_entries += [Entry] - - Entry = {} - Entry["table"] = "SwitchIngress.bias" - Entry["match"] = {} - Entry["match"]["hdr.Planter.ver"] = 1 - Entry["action_name"] = "SwitchIngress.read_bias" - Entry["action_params"] = {} - for ax in range(num_components): - Entry["action_params"]["bias_ax" + str(ax)] = np.int(Exact_Table["bias"]['ax' + str(ax)]) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_components = config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) - for ax in range(num_components): - label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] - file.write( str(label) + " ") - file.write("\n") - file.write("\n") - - - - file.write("table_add SwitchIngress.bias read_bias 1 => " ) - - for ax in range(num_components): - file.write(str(label) + " ") - label = np.int(Exact_Table["bias"]['ax' + str(ax)]) - file.write("\n") - file.write("\n") - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for ax in range(0, config['num_axis']): - if ax==0: - tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.bias.add_with_read_bias(" - "1, ") - for ax in range(0, config['num_axis']): - if ax == 0: - tload.write("table['bias']['ax" + str(ax) + "']") - else: - tload.write(", table['bias']['ax" + str(ax) + "']") - - tload.write(")\n\n") - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + config['num_axis'] = Planter_config['p4 config']["num components"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for ax in range(0, config['num_axis']): + # headers.write(" bit<" + str(config['num_bits']) + "> middle_ax" + str(ax) + ";\n") + headers.write(" bit<32> middle_ax" + str(ax) + ";\n") + + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for ax in range(0, config['num_axis']): + if ax==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) + ingress.write("){\n") + + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_bias(") + for ax in range(0, config['num_axis']): + if ax==0: + ingress.write("bit<" + str(config['num_bits']) + "> bias_ax" + str(ax)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> bias_ax"+str(ax)) + + ingress.write("){\n") + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+ " = (bit<32>)bias_ax"+str(ax) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table bias {\n" + " key = {hdr.Planter.ver:exact;}\n" + " actions={read_bias; NoAction;}\n" + # " const entries = {\n" + # " 0 : "+config['thresh_and_bias'] +"};\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action wrap_back( ){\n") + for ax in range(0, config['num_axis']): + ingress.write(" hdr.Planter.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") + ingress.write(" }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" bias.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" wrap_back();\n") + ingress.write(" /* Swap the MAC addresses */\n" + " bit<48> tmp;\n" + " tmp = hdr.ethernet.dstAddr;\n" + " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" + " hdr.ethernet.srcAddr = tmp;\n" + # " send(3);\n") + " send(ig_intr_md.ingress_port);\n") + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature" + str(f) + Entry["match"] = {} + Entry["match"]["hdr.Planter.feature" + str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) + Entry["action_params"] = {} + for ax in range(num_components): + Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] + Table_entries += [Entry] + + Entry = {} + Entry["table"] = "SwitchIngress.bias" + Entry["match"] = {} + Entry["match"]["hdr.Planter.ver"] = 1 + Entry["action_name"] = "SwitchIngress.read_bias" + Entry["action_params"] = {} + for ax in range(num_components): + Entry["action_params"]["bias_ax" + str(ax)] = np.int(Exact_Table["bias"]['ax' + str(ax)]) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_components = config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) + for ax in range(num_components): + label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] + file.write( str(label) + " ") + file.write("\n") + file.write("\n") + + + + file.write("table_add SwitchIngress.bias read_bias 1 => " ) + + for ax in range(num_components): + file.write(str(label) + " ") + label = np.int(Exact_Table["bias"]['ax' + str(ax)]) + file.write("\n") + file.write("\n") + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for ax in range(0, config['num_axis']): + if ax==0: + tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.bias.add_with_read_bias(" + "1, ") + for ax in range(0, config['num_axis']): + if ax == 0: + tload.write("table['bias']['ax" + str(ax) + "']") + else: + tload.write(", table['bias']['ax" + str(ax) + "']") + + tload.write(")\n\n") + + diff --git a/src/models/Autoencoder/Type_LB/readme.md b/src/models/Autoencoder/Type_LB/readme.md index 7e3a873..52873d0 100644 --- a/src/models/Autoencoder/Type_LB/readme.md +++ b/src/models/Autoencoder/Type_LB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Autoencoder. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Autoencoder. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Autoencoder/Type_LB/table_generator.py b/src/models/Autoencoder/Type_LB/table_generator.py index 16f6468..8b2b640 100755 --- a/src/models/Autoencoder/Type_LB/table_generator.py +++ b/src/models/Autoencoder/Type_LB/table_generator.py @@ -1,289 +1,289 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import torch.nn as nn -from torch.autograd import Variable as V -import torch -from torch.utils.data import DataLoader, Dataset, TensorDataset -from sklearn.datasets import load_iris -from sklearn.preprocessing import MinMaxScaler -import numpy -import matplotlib.pyplot as plt -from sklearn.ensemble import IsolationForest -import numpy as np -import json -import copy -from scipy.stats import pearsonr -from src.functions.json_encoder import * -import math -import time -import time -# print(isinstance(my_dataset,Dataset)) - - -###### Define an autoencoder model -class autoencoder(nn.Module): - def __init__(self, num_features, num_components): - super(autoencoder, self).__init__() - self.encoder = nn.Sequential( - nn.Linear(num_features, num_components), - # nn.Tanh(), - # nn.Linear(3, 2), - ) - self.decoder = nn.Sequential( - # nn.Linear(2, 3), - # nn.Tanh(), - nn.Linear(num_components, num_features), - # nn.Sigmoid() - ) - - def forward(self, x): - encoder = self.encoder(x) - decoder = self.decoder(encoder) - return encoder, decoder - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - Planter_config['model config']['num components'] = np.int(input('- Number of components? (default = 2) ') or '2') - Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') - Planter_config['model config']['batch size'] = np.int(input('- Model batch size? (default = 100) ') or '100') - Planter_config['model config']['num epoch'] = np.int(input('- Number of training epoch? (default = 50) ') or '50') - Planter_config['model config']['number of bits'] = np.int( - input('- Number of bits for each action data? (default = 16) ') or '16') - - num_bits = Planter_config['model config']['number of bits'] - - num_components = Planter_config['model config']['num components'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - learning_rate = Planter_config['model config']['learning rate'] - batch_size = Planter_config['model config']['batch size'] - num_epoch = Planter_config['model config']['num epoch'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [max(t_t)+1] - ###### Normalize the input as the autoencoder only uses the input - # MMScaler = MinMaxScaler() - # x = MMScaler.fit_transform(x) - # iforestX = x - - ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 - tensor_x = torch.from_numpy(train_X.to_numpy().astype(numpy.float32)) - tensor_y = torch.from_numpy(train_y.astype(numpy.float32)) - # X_new = copy.deepcopy(test_X) - sklearn_X_new = copy.deepcopy(test_X) - test_X = torch.from_numpy(test_X.to_numpy().astype(numpy.float32)) - test_y = torch.from_numpy(test_y.astype(numpy.float32)) - my_dataset = TensorDataset(tensor_x, tensor_y) - my_test_dataset = TensorDataset(test_X, test_y) - my_dataset_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=False) - - - model = autoencoder(num_features,num_components) - - ####### Define the loss function - - criterion = nn.MSELoss() - - ####### Define the optimization function - optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # If using SGD, convergence does not decrease - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - ####### Set epoch to 300 - - for epoch in range(num_epoch): - total_loss = 0 - for i, (x, y) in enumerate(my_dataset_loader): - _, pred = model(V(x)) - loss = criterion(pred, x) - - - optimizer.zero_grad() - loss.backward() - optimizer.step() - total_loss += loss - if epoch % 10 == 0: - print('\rTraining loss {}'.format(total_loss.data.numpy()),end=" ") - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - model_info = {} - for i, param in enumerate (model.parameters()): - # print(param) - model_info[i] = param.detach().numpy() - model_info['weights'] = model_info[0].T - model_info['bias'] = model_info[1] - ###### Perform dimensionality reduction and visualization based on the trained model - - print('\nGenerate the table...',end="") - - value_info = {} - value_info["max"] = np.max(model_info['bias']) - value_info["min"] = np.min(model_info['bias']) - for ax in range(num_components): - value_info["ax " + str(ax)] = {} - value_info["ax " + str(ax)]["max"] = model_info['bias'][ax] - value_info["ax " + str(ax)]["min"] = model_info['bias'][ax] - - g_table = {} - for f in range(num_features): - g_table['feature ' + str(f)] = {} - for input_value in range(feature_max[f]): - g_table['feature ' + str(f)][input_value] = {} - for ax in range(num_components): - middle_value = copy.deepcopy(input_value * model_info['weights'][f, ax]) - g_table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value - if middle_value > value_info["ax " + str(ax)]["max"]: - value_info["ax " + str(ax)]["max"] = middle_value - if middle_value < value_info["ax " + str(ax)]["min"]: - value_info["ax " + str(ax)]["min"] = middle_value - if middle_value > value_info["max"]: - value_info["max"] = middle_value - if middle_value < value_info["min"]: - value_info["min"] = middle_value - - scale = (2 ** num_bits) / ((value_info["max"] - value_info["min"]) * (num_features+1)) - - Exact_Table = {} - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for input_value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][input_value] = {} - for ax in range(num_components): - middle_value = copy.deepcopy(g_table['feature ' + str(f)][input_value]['ax' + str(ax)]) - middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) - Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value - - Exact_Table['bias'] = {} - for ax in range(num_components): - Exact_Table['bias']['ax' + str(ax)] = np.int(np.floor((model_info['bias'][ax]- value_info["min"])*scale)) - - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - print('Done') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "Autoencoder" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['p4 config']["num components"] = num_components - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'dimension_reduction' - - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - x_ = [] - y_ = [] - for i, (x, y) in enumerate(my_test_dataset): - _, pred = model(V(x)) - dimension = _.data.numpy() - for ax in range(num_components): - sklearn_X_new.values[i,ax] = dimension[ax] - (num_features + 1) * value_info["min"] - # prepare for plot - x_.append(dimension[0]-(num_features+1)*value_info["min"]) - y_.append(dimension[1]-(num_features+1)*value_info["min"]) - - - plot_result = input('- Plot the training result ? (default = n) ') or 'n' - - if plot_result == 'y': - print('plot') - plt.scatter(numpy.array(x_), numpy.array(y_), c=test_y.detach().numpy()) - - for i in range(len(numpy.array(x_))): - plt.annotate(i, (x_[i], y_[i])) - plt.show() - return sklearn_X_new.values - - -def test_tables(sklearn_test_x, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_x = copy.deepcopy(sklearn_test_x) - - - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - for ax in range(num_components): - switch_test_x[i][ax] = copy.deepcopy(Exact_Table['bias']['ax'+str(ax)]) - for f in range(num_features): - ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] - for ax in range(num_components): - switch_test_x[i][ax] += ax_middle["ax"+str(ax)] - # print(sklearn_test_x[i], switch_test_x[i]) - # test_X.values[i] - # switch_test_x.values[i] - for ax in range(num_components): - corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) - print('Pearsons correlation of M/A PCA and output of Pytorch for axis '+str(ax)+' is: %.4f' % corr) - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import torch.nn as nn +from torch.autograd import Variable as V +import torch +from torch.utils.data import DataLoader, Dataset, TensorDataset +from sklearn.datasets import load_iris +from sklearn.preprocessing import MinMaxScaler +import numpy +import matplotlib.pyplot as plt +from sklearn.ensemble import IsolationForest +import numpy as np +import json +import copy +from scipy.stats import pearsonr +from src.functions.json_encoder import * +import math +import time +import time +# print(isinstance(my_dataset,Dataset)) + + +###### Define an autoencoder model +class autoencoder(nn.Module): + def __init__(self, num_features, num_components): + super(autoencoder, self).__init__() + self.encoder = nn.Sequential( + nn.Linear(num_features, num_components), + # nn.Tanh(), + # nn.Linear(3, 2), + ) + self.decoder = nn.Sequential( + # nn.Linear(2, 3), + # nn.Tanh(), + nn.Linear(num_components, num_features), + # nn.Sigmoid() + ) + + def forward(self, x): + encoder = self.encoder(x) + decoder = self.decoder(encoder) + return encoder, decoder + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + Planter_config['model config']['num components'] = np.int(input('- Number of components? (default = 2) ') or '2') + Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') + Planter_config['model config']['batch size'] = np.int(input('- Model batch size? (default = 100) ') or '100') + Planter_config['model config']['num epoch'] = np.int(input('- Number of training epoch? (default = 50) ') or '50') + Planter_config['model config']['number of bits'] = np.int( + input('- Number of bits for each action data? (default = 16) ') or '16') + + num_bits = Planter_config['model config']['number of bits'] + + num_components = Planter_config['model config']['num components'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + learning_rate = Planter_config['model config']['learning rate'] + batch_size = Planter_config['model config']['batch size'] + num_epoch = Planter_config['model config']['num epoch'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [max(t_t)+1] + ###### Normalize the input as the autoencoder only uses the input + # MMScaler = MinMaxScaler() + # x = MMScaler.fit_transform(x) + # iforestX = x + + ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 + tensor_x = torch.from_numpy(train_X.to_numpy().astype(numpy.float32)) + tensor_y = torch.from_numpy(train_y.astype(numpy.float32)) + # X_new = copy.deepcopy(test_X) + sklearn_X_new = copy.deepcopy(test_X) + test_X = torch.from_numpy(test_X.to_numpy().astype(numpy.float32)) + test_y = torch.from_numpy(test_y.astype(numpy.float32)) + my_dataset = TensorDataset(tensor_x, tensor_y) + my_test_dataset = TensorDataset(test_X, test_y) + my_dataset_loader = DataLoader(my_dataset, batch_size=batch_size, shuffle=False) + + + model = autoencoder(num_features,num_components) + + ####### Define the loss function + + criterion = nn.MSELoss() + + ####### Define the optimization function + optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # If using SGD, convergence does not decrease + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + ####### Set epoch to 300 + + for epoch in range(num_epoch): + total_loss = 0 + for i, (x, y) in enumerate(my_dataset_loader): + _, pred = model(V(x)) + loss = criterion(pred, x) + + + optimizer.zero_grad() + loss.backward() + optimizer.step() + total_loss += loss + if epoch % 10 == 0: + print('\rTraining loss {}'.format(total_loss.data.numpy()),end=" ") + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + model_info = {} + for i, param in enumerate (model.parameters()): + # print(param) + model_info[i] = param.detach().numpy() + model_info['weights'] = model_info[0].T + model_info['bias'] = model_info[1] + ###### Perform dimensionality reduction and visualization based on the trained model + + print('\nGenerate the table...',end="") + + value_info = {} + value_info["max"] = np.max(model_info['bias']) + value_info["min"] = np.min(model_info['bias']) + for ax in range(num_components): + value_info["ax " + str(ax)] = {} + value_info["ax " + str(ax)]["max"] = model_info['bias'][ax] + value_info["ax " + str(ax)]["min"] = model_info['bias'][ax] + + g_table = {} + for f in range(num_features): + g_table['feature ' + str(f)] = {} + for input_value in range(feature_max[f]): + g_table['feature ' + str(f)][input_value] = {} + for ax in range(num_components): + middle_value = copy.deepcopy(input_value * model_info['weights'][f, ax]) + g_table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value + if middle_value > value_info["ax " + str(ax)]["max"]: + value_info["ax " + str(ax)]["max"] = middle_value + if middle_value < value_info["ax " + str(ax)]["min"]: + value_info["ax " + str(ax)]["min"] = middle_value + if middle_value > value_info["max"]: + value_info["max"] = middle_value + if middle_value < value_info["min"]: + value_info["min"] = middle_value + + scale = (2 ** num_bits) / ((value_info["max"] - value_info["min"]) * (num_features+1)) + + Exact_Table = {} + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for input_value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][input_value] = {} + for ax in range(num_components): + middle_value = copy.deepcopy(g_table['feature ' + str(f)][input_value]['ax' + str(ax)]) + middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) + Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value + + Exact_Table['bias'] = {} + for ax in range(num_components): + Exact_Table['bias']['ax' + str(ax)] = np.int(np.floor((model_info['bias'][ax]- value_info["min"])*scale)) + + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + print('Done') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "Autoencoder" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['p4 config']["num components"] = num_components + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'dimension_reduction' + + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + x_ = [] + y_ = [] + for i, (x, y) in enumerate(my_test_dataset): + _, pred = model(V(x)) + dimension = _.data.numpy() + for ax in range(num_components): + sklearn_X_new.values[i,ax] = dimension[ax] - (num_features + 1) * value_info["min"] + # prepare for plot + x_.append(dimension[0]-(num_features+1)*value_info["min"]) + y_.append(dimension[1]-(num_features+1)*value_info["min"]) + + + plot_result = input('- Plot the training result ? (default = n) ') or 'n' + + if plot_result == 'y': + print('plot') + plt.scatter(numpy.array(x_), numpy.array(y_), c=test_y.detach().numpy()) + + for i in range(len(numpy.array(x_))): + plt.annotate(i, (x_[i], y_[i])) + plt.show() + return sklearn_X_new.values + + +def test_tables(sklearn_test_x, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_x = copy.deepcopy(sklearn_test_x) + + + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + for ax in range(num_components): + switch_test_x[i][ax] = copy.deepcopy(Exact_Table['bias']['ax'+str(ax)]) + for f in range(num_features): + ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] + for ax in range(num_components): + switch_test_x[i][ax] += ax_middle["ax"+str(ax)] + # print(sklearn_test_x[i], switch_test_x[i]) + # test_X.values[i] + # switch_test_x.values[i] + for ax in range(num_components): + corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) + print('Pearsons correlation of M/A PCA and output of Pytorch for axis '+str(ax)+' is: %.4f' % corr) + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/Autoencoder/readme.md b/src/models/Autoencoder/readme.md index 2bf5d7c..1d0397a 100644 --- a/src/models/Autoencoder/readme.md +++ b/src/models/Autoencoder/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported autoencoder. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported autoencoder. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Bayes/Type_1/dedicated_p4.py b/src/models/Bayes/Type_1/dedicated_p4.py index 4580327..c6b810e 100755 --- a/src/models/Bayes/Type_1/dedicated_p4.py +++ b/src/models/Bayes/Type_1/dedicated_p4.py @@ -1,333 +1,333 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c+1, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") - - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.prob_c" + str(c)+" = meta.prob_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { meta.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_class_prob(") - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) - - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table class_prob {\n" - " key = {meta.flag:exact;}\n" - " actions={read_class_prob; NoAction;}\n" - # " const entries = {\n" - # " 0 : "+config['thresh_and_bias'] +"};\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) - +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"meta.result = "+str(np.int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" - +(config['num_bits']-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - ingress.write(" class_prob.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" compare();\n\n") - - - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["f"+str(f)+"c"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] - Table_entries += [Entry] - - Entry = {} - Entry["table"] = "SwitchIngress.class_prob" - Entry["match"] = {} - Entry["match"]["meta.flag"] = 1 - Entry["action_name"] = "SwitchIngress.read_class_prob" - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["p_c" + str(c)] = Exact_Table["class prob"]["class " + str(c)] - Table_entries += [Entry] - - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => ") - for c in range(num_classes): - file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") - file.write("\n") - file.write("\n") - - - - file.write("table_add SwitchIngress.class_prob read_class_prob 1 => ") - for c in range(num_classes): - file.write(str(Exact_Table["class prob"]["class " + str(c)])+" ") - file.write("\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for c in range(0, config['num_classes']): - if c==0: - tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load thresh_and_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.class_prob.add_with_read_class_prob(" - "1, ") - for c in range(0, config['num_classes']): - if c == 0: - tload.write("table['class prob']['class " + str(c) + "']") - else: - tload.write(", table['class prob']['class " + str(c) + "']") - - tload.write(")\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c+1, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") + + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.prob_c" + str(c)+" = meta.prob_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { meta.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_class_prob(") + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) + + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table class_prob {\n" + " key = {meta.flag:exact;}\n" + " actions={read_class_prob; NoAction;}\n" + # " const entries = {\n" + # " 0 : "+config['thresh_and_bias'] +"};\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) + +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"meta.result = "+str(np.int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" + +(config['num_bits']-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + ingress.write(" class_prob.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" compare();\n\n") + + + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["f"+str(f)+"c"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] + Table_entries += [Entry] + + Entry = {} + Entry["table"] = "SwitchIngress.class_prob" + Entry["match"] = {} + Entry["match"]["meta.flag"] = 1 + Entry["action_name"] = "SwitchIngress.read_class_prob" + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["p_c" + str(c)] = Exact_Table["class prob"]["class " + str(c)] + Table_entries += [Entry] + + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => ") + for c in range(num_classes): + file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") + file.write("\n") + file.write("\n") + + + + file.write("table_add SwitchIngress.class_prob read_class_prob 1 => ") + for c in range(num_classes): + file.write(str(Exact_Table["class prob"]["class " + str(c)])+" ") + file.write("\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for c in range(0, config['num_classes']): + if c==0: + tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load thresh_and_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.class_prob.add_with_read_class_prob(" + "1, ") + for c in range(0, config['num_classes']): + if c == 0: + tload.write("table['class prob']['class " + str(c) + "']") + else: + tload.write(", table['class prob']['class " + str(c) + "']") + + tload.write(")\n\n") diff --git a/src/models/Bayes/Type_1/readme.md b/src/models/Bayes/Type_1/readme.md index 19d16ce..ba1e88a 100644 --- a/src/models/Bayes/Type_1/readme.md +++ b/src/models/Bayes/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Bayes/Type_1/table_generator.py b/src/models/Bayes/Type_1/table_generator.py index d697241..86f288f 100755 --- a/src/models/Bayes/Type_1/table_generator.py +++ b/src/models/Bayes/Type_1/table_generator.py @@ -1,275 +1,275 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn.metrics import * -from sklearn.svm import SVC -from sklearn.svm import LinearSVC -from sklearn.naive_bayes import GaussianNB -import pydotplus -import copy -import math - -import os -import sys -# import grpc -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * - -def ten_to_bin(num,count): - num = int(num) - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class - part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 - part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - return part_1*np.exp(-part_2_u/part_2_l) - - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # fit - clf = GaussianNB() - clf.fit(train_X, train_y) - sklearn_y_predict = clf.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - model_parmeters={} - for c in range(num_classes): - model_parmeters['c'+str(c)] = {} - for f in range(num_features): - model_parmeters['c' + str(c)]["f"+str(f)] = {} - model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) - model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] - - value_info = {} - value_info["max"] = 0 - value_info["min"] = 0 - for f in range(num_features): - value_info["f" + str(f)] = {} - value_info["f"+str(f)]["max"] = 0 - value_info["f"+str(f)]["min"] = 0 - - Bayes_separate_table = {} - for f in range(num_features): - Bayes_separate_table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Bayes_separate_table['feature '+str(f)][inputs]={} - for c in range(num_classes): - if calculate_prob(inputs,f,c,model_parmeters)==0: - value = value_info["min"] - else: - value = math.log(calculate_prob(inputs,f,c,model_parmeters),2) - Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - Bayes_separate_table["class prob"] = {} - for c in range(num_classes): - value = clf.class_prior_[c] - Bayes_separate_table["class prob"]['class '+str(c)]= math.log(value,2) - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - scale = (2**num_bits)/(num_features+1) - Exact_Table = {} - Exact_Table['class prob'] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) - # x = copy.deepcopy(int(scale * Bayes_separate_table['class prob']['class ' + str(c)])) - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['class prob']['class '+str(c)] = np.int(np.round(value*scale)) - - - for f in range(num_features): - Exact_Table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Exact_Table['feature ' + str(f)][inputs] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = np.int(np.round(value*scale)) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "Bayes" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - class_prob = np.zeros(num_classes).tolist() - - for c in range(num_classes): - class_prob[c] = Exact_Table['class prob']['class '+str(c)] - - - for f in range(num_features): - for c in range(num_classes): - class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] - # print(class_prob) - switch_prediction = class_prob.index(np.max(class_prob)) - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.4}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - # sys.stdout.flush() - print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) - result = classification_report(test_y, switch_test_y, digits=4) - print('\n', result) - - - - - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['resource info'] = {} - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - - logical_dependency_memory = np.zeros(num_features+1) - logical_dependency_memory[0] = num_classes * ( Planter_config['p4 config']["action data bits"] + 8) - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - for s in range(num_features): - logical_dependency_memory[s+1] = Planter_config['p4 config']["feature tbl len"][s]*(Planter_config['p4 config']["action data bits"]+32) - - Planter_config['resource info']['logical memory per dependency'] = logical_dependency_memory.astype(int) - Planter_config['resource info']['total table bits'] = np.sum(logical_dependency_memory.astype(int)) - # print('The predicted number of bits usage per dependency stage:\n', logical_dependency_memory.astype(int), ', in total:', np.sum(logical_dependency_memory.astype(int)), "bits") - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is updated') - - - -if __name__ == "__main__": - resource_prediction() - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn.metrics import * +from sklearn.svm import SVC +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +import pydotplus +import copy +import math + +import os +import sys +# import grpc +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * + +def ten_to_bin(num,count): + num = int(num) + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class + part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 + part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + return part_1*np.exp(-part_2_u/part_2_l) + + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # fit + clf = GaussianNB() + clf.fit(train_X, train_y) + sklearn_y_predict = clf.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + model_parmeters={} + for c in range(num_classes): + model_parmeters['c'+str(c)] = {} + for f in range(num_features): + model_parmeters['c' + str(c)]["f"+str(f)] = {} + model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) + model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] + + value_info = {} + value_info["max"] = 0 + value_info["min"] = 0 + for f in range(num_features): + value_info["f" + str(f)] = {} + value_info["f"+str(f)]["max"] = 0 + value_info["f"+str(f)]["min"] = 0 + + Bayes_separate_table = {} + for f in range(num_features): + Bayes_separate_table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Bayes_separate_table['feature '+str(f)][inputs]={} + for c in range(num_classes): + if calculate_prob(inputs,f,c,model_parmeters)==0: + value = value_info["min"] + else: + value = math.log(calculate_prob(inputs,f,c,model_parmeters),2) + Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + Bayes_separate_table["class prob"] = {} + for c in range(num_classes): + value = clf.class_prior_[c] + Bayes_separate_table["class prob"]['class '+str(c)]= math.log(value,2) + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + scale = (2**num_bits)/(num_features+1) + Exact_Table = {} + Exact_Table['class prob'] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) + # x = copy.deepcopy(int(scale * Bayes_separate_table['class prob']['class ' + str(c)])) + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['class prob']['class '+str(c)] = np.int(np.round(value*scale)) + + + for f in range(num_features): + Exact_Table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Exact_Table['feature ' + str(f)][inputs] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = np.int(np.round(value*scale)) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "Bayes" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + class_prob = np.zeros(num_classes).tolist() + + for c in range(num_classes): + class_prob[c] = Exact_Table['class prob']['class '+str(c)] + + + for f in range(num_features): + for c in range(num_classes): + class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] + # print(class_prob) + switch_prediction = class_prob.index(np.max(class_prob)) + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.4}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + # sys.stdout.flush() + print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) + result = classification_report(test_y, switch_test_y, digits=4) + print('\n', result) + + + + + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['resource info'] = {} + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + + logical_dependency_memory = np.zeros(num_features+1) + logical_dependency_memory[0] = num_classes * ( Planter_config['p4 config']["action data bits"] + 8) + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + for s in range(num_features): + logical_dependency_memory[s+1] = Planter_config['p4 config']["feature tbl len"][s]*(Planter_config['p4 config']["action data bits"]+32) + + Planter_config['resource info']['logical memory per dependency'] = logical_dependency_memory.astype(int) + Planter_config['resource info']['total table bits'] = np.sum(logical_dependency_memory.astype(int)) + # print('The predicted number of bits usage per dependency stage:\n', logical_dependency_memory.astype(int), ', in total:', np.sum(logical_dependency_memory.astype(int)), "bits") + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is updated') + + + +if __name__ == "__main__": + resource_prediction() + diff --git a/src/models/Bayes/Type_2/dedicated_p4.py b/src/models/Bayes/Type_2/dedicated_p4.py index 97e0734..bdb459e 100755 --- a/src/models/Bayes/Type_2/dedicated_p4.py +++ b/src/models/Bayes/Type_2/dedicated_p4.py @@ -1,350 +1,350 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - config['c_tbl_len'] = Planter_config['p4 config']["code tbl len"] - config['prob_bits'] = Planter_config['p4 config']["prob data bits"] - - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for f in range(0, config['num_features']): - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) +"_c"+str(c)+";\n") - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['prob_bits']) + "> prob_c" + str(c) + ";\n") - for c in range(config['num_classes']): - for c1 in range(c+1, config['num_classes']): - headers.write(" bit<" + str(config['prob_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> code" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> code"+str(c)) - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.f" + str(f)+"_c"+str(c)+" = code" + str(c)+" ;\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { meta.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - for c in range(0, config['num_classes']): - ingress.write(" action read_prob"+str(c)+"(" - "bit<" + str(config['prob_bits']) + "> prob"+str(c)+"){\n") - ingress.write(" meta.prob_c" + str(c)+ " = prob"+str(c) +";\n") - ingress.write(" }\n\n") - - for c in range(0, config['num_classes']): - ingress.write(" table lookup_prob"+str(c)+" {\n" - " key = {\n") - for f in range(0, config['num_features']): - ingress.write(" meta.f"+str(f)+"_c"+str(c)+":exact;\n") - ingress.write(" }\n" - " actions={read_prob"+str(c)+"; NoAction;}\n" - " default_action = NoAction;\n" - " size = " + str( config['c_tbl_len']) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - # " hdr.Planter.result = (bit<32>) meta.result_hp1;\n" - " }\n\n") - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) - +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"meta.result = "+str(np.int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - # txt.write(" "+c_n*" "+"meta.compare" + str(np.int(compare[0])) - # + "_" + str(np.int(compare[1])) + " = meta.prob_c" + str(np.int(compare[0])) - # + " - meta.prob_c" + str( np.int(compare[1])) + ";\n") - label = compare[1] - # print(con_list, c_n) - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" - +(config['prob_bits']-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - for c in range(0, config['num_classes']): - ingress.write(" lookup_prob" + str(c) + ".apply();\n") - ingress.write(" compare();\n") - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["code"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] - Table_entries += [Entry] - - for c in range(num_classes): - for idx in Exact_Table['get total prob']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_prob"+str(c) - Entry["match"] = {} - for f in range(num_features): - Entry["match"]["meta.f"+str(f)+"_c"+str(c)] = np.int(Exact_Table['get total prob'][idx]['product_list'][f]) - Entry["action_name"] = "SwitchIngress.read_prob"+str(c) - Entry["action_params"] = {} - Entry["action_params"]["prob"+str(c)] = np.int(Exact_Table['get total prob'][idx]["class"+str(c)]) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => ") - for c in range(num_classes): - file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") - file.write("\n") - file.write("\n") - - - - for c in range(num_classes): - for idx in Exact_Table['get total prob']: - file.write("table_add SwitchIngress.lookup_prob"+str(c)+" read_prob"+str(c)+" ") - for f in range(num_features): - file.write(str(np.int( Exact_Table['get total prob'][idx]['product_list'][f]))+" ") - file.write("=> "+str(np.int(Exact_Table['get total prob'][idx]["class" + str(c)]))+"\n") - file.write("\n") - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for c in range(0, config['num_classes']): - if c==0: - tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - for c in range(0, config['num_classes']): - - tload.write("print('load lookup_prob"+str(c)+" with',len(table['get total prob'].keys()),'entries')\n") - tload.write("for key in table['get total prob']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_prob" + str(c) + - ".add_with_read_prob" + str(c) + "(") - for f in range(config['num_features']): - if f==0: - tload.write("table['get total prob'][key]['product_list'][" + str(f) + "]") - else: - tload.write(", table['get total prob'][key]['product_list'][" + str(f) + "]") - - tload.write(", int(table['get total prob'][key]['class" + str(c) + "'])") - tload.write(")\n") - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + config['c_tbl_len'] = Planter_config['p4 config']["code tbl len"] + config['prob_bits'] = Planter_config['p4 config']["prob data bits"] + + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for f in range(0, config['num_features']): + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) +"_c"+str(c)+";\n") + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['prob_bits']) + "> prob_c" + str(c) + ";\n") + for c in range(config['num_classes']): + for c1 in range(c+1, config['num_classes']): + headers.write(" bit<" + str(config['prob_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> code" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> code"+str(c)) + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.f" + str(f)+"_c"+str(c)+" = code" + str(c)+" ;\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { meta.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + for c in range(0, config['num_classes']): + ingress.write(" action read_prob"+str(c)+"(" + "bit<" + str(config['prob_bits']) + "> prob"+str(c)+"){\n") + ingress.write(" meta.prob_c" + str(c)+ " = prob"+str(c) +";\n") + ingress.write(" }\n\n") + + for c in range(0, config['num_classes']): + ingress.write(" table lookup_prob"+str(c)+" {\n" + " key = {\n") + for f in range(0, config['num_features']): + ingress.write(" meta.f"+str(f)+"_c"+str(c)+":exact;\n") + ingress.write(" }\n" + " actions={read_prob"+str(c)+"; NoAction;}\n" + " default_action = NoAction;\n" + " size = " + str( config['c_tbl_len']) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + # " hdr.Planter.result = (bit<32>) meta.result_hp1;\n" + " }\n\n") + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) + +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"meta.result = "+str(np.int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + # txt.write(" "+c_n*" "+"meta.compare" + str(np.int(compare[0])) + # + "_" + str(np.int(compare[1])) + " = meta.prob_c" + str(np.int(compare[0])) + # + " - meta.prob_c" + str( np.int(compare[1])) + ";\n") + label = compare[1] + # print(con_list, c_n) + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" + +(config['prob_bits']-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + for c in range(0, config['num_classes']): + ingress.write(" lookup_prob" + str(c) + ".apply();\n") + ingress.write(" compare();\n") + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["code"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] + Table_entries += [Entry] + + for c in range(num_classes): + for idx in Exact_Table['get total prob']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_prob"+str(c) + Entry["match"] = {} + for f in range(num_features): + Entry["match"]["meta.f"+str(f)+"_c"+str(c)] = np.int(Exact_Table['get total prob'][idx]['product_list'][f]) + Entry["action_name"] = "SwitchIngress.read_prob"+str(c) + Entry["action_params"] = {} + Entry["action_params"]["prob"+str(c)] = np.int(Exact_Table['get total prob'][idx]["class"+str(c)]) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => ") + for c in range(num_classes): + file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") + file.write("\n") + file.write("\n") + + + + for c in range(num_classes): + for idx in Exact_Table['get total prob']: + file.write("table_add SwitchIngress.lookup_prob"+str(c)+" read_prob"+str(c)+" ") + for f in range(num_features): + file.write(str(np.int( Exact_Table['get total prob'][idx]['product_list'][f]))+" ") + file.write("=> "+str(np.int(Exact_Table['get total prob'][idx]["class" + str(c)]))+"\n") + file.write("\n") + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for c in range(0, config['num_classes']): + if c==0: + tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + for c in range(0, config['num_classes']): + + tload.write("print('load lookup_prob"+str(c)+" with',len(table['get total prob'].keys()),'entries')\n") + tload.write("for key in table['get total prob']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_prob" + str(c) + + ".add_with_read_prob" + str(c) + "(") + for f in range(config['num_features']): + if f==0: + tload.write("table['get total prob'][key]['product_list'][" + str(f) + "]") + else: + tload.write(", table['get total prob'][key]['product_list'][" + str(f) + "]") + + tload.write(", int(table['get total prob'][key]['class" + str(c) + "'])") + tload.write(")\n") + + + + diff --git a/src/models/Bayes/Type_2/readme.md b/src/models/Bayes/Type_2/readme.md index 19d16ce..ba1e88a 100644 --- a/src/models/Bayes/Type_2/readme.md +++ b/src/models/Bayes/Type_2/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Bayes/Type_2/table_generator.py b/src/models/Bayes/Type_2/table_generator.py index b428e87..e85362a 100755 --- a/src/models/Bayes/Type_2/table_generator.py +++ b/src/models/Bayes/Type_2/table_generator.py @@ -1,267 +1,267 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import argparse - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn.metrics import * -from sklearn.svm import SVC -from sklearn.svm import LinearSVC -from sklearn.naive_bayes import GaussianNB -import pydotplus -import copy -import math -import time - -import os -import sys -# import grpc -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * - -def ten_to_bin(num,count): - num = int(num) - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class - part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 - part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - return part_1*np.exp(-part_2_u/part_2_l) - - -def get_final_prob(feature_num, product_list, num_classes, num_features, scale, Exact_Table, Bayes_separate_table, num): - if feature_num == num_features: - Exact_Table['get total prob'][num] = {} - Exact_Table['get total prob'][num]['product_list'] = copy.deepcopy(product_list) - value = 1 - for f in range(num_features): - value = value * product_list[f] - for c in range(num_classes): - Exact_Table['get total prob'][num]['class'+str(c)] = np.round(copy.deepcopy(value*Bayes_separate_table['class prob']['class '+str(c)])) - # print(product_list,Bayes_separate_table['class prob']['class '+str(c)], Exact_Table['get total prob'][num]['class'+str(c)]) - # print(Exact_Table['get total prob'][num]) - num += 1 - return Exact_Table, num - else: - for value in range(scale+1): - product_list[feature_num] = value - feature_num += 1 - Exact_Table, num = get_final_prob(feature_num, product_list, num_classes, num_features, scale, Exact_Table, Bayes_separate_table, num) - feature_num -= 1 - return Exact_Table, num - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['prob partition'] = np.int(input('- Number prob partition for each feature\'s posterior probability? (default = 6) ') or '6') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - prob_partition = Planter_config['model config']['prob partition'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # fit - clf = GaussianNB() - clf.fit(train_X, train_y) - sklearn_y_predict = clf.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - model_parmeters={} - for c in range(num_classes): - model_parmeters['c'+str(c)] = {} - for f in range(num_features): - model_parmeters['c' + str(c)]["f"+str(f)] = {} - model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) - # model_parmeters['c' + str(c)]["f" + str(f)]['std'] = clf.sigma_[c, f] - model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] - - value_info = {} - for f in range(num_features): - value_info["f" + str(f)] = {} - value_info["f"+str(f)]["max"] = 0 - value_info["f"+str(f)]["min"] = 0 - Bayes_separate_table = {} - for f in range(num_features): - Bayes_separate_table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Bayes_separate_table['feature '+str(f)][inputs]={} - # class_prob = probability_f_No(feature,i,model_parmeters, classes = 2) - for c in range(num_classes): - value = calculate_prob(inputs,f,c,model_parmeters) - Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value - if value > value_info["f"+str(f)]["max"]: - value_info["f"+str(f)]["max"] = value - if value < value_info["f"+str(f)]["min"]: - value_info["f"+str(f)]["min"] = value - - Bayes_separate_table["class prob"] = {} - for c in range(num_classes): - Bayes_separate_table["class prob"]['class '+str(c)]= clf.class_prior_[c] - - scale = prob_partition - for c in range(num_classes): - min_x = value_info["f" + str(f)]["min"] - max_x = value_info["f" + str(f)]["max"] - x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) - # x = copy.deepcopy(int(scale * Bayes_separate_table['class prob']['class ' + str(c)])) - value = Single_MaxMinNormalization(x, min_x, max_x) - Bayes_separate_table['class prob']['class '+str(c)] = copy.deepcopy(value*scale) - - - for f in range(num_features): - for inputs in range(0,feature_max[f]+1): - for c in range(num_classes): - min_x = value_info["f"+str(f)]["min"] - max_x = value_info["f"+str(f)]["max"] - x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] - value = Single_MaxMinNormalization(x, min_x, max_x) - Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = np.int(value*scale) - - - - Exact_Table = {} - Exact_Table['get total prob'] = {} - Exact_Table, _ = get_final_prob(0, np.zeros(num_features).tolist(), num_classes, num_features, scale, Exact_Table,Bayes_separate_table, 0) - for f in range(num_features): - Exact_Table['feature '+str(f)] = Bayes_separate_table['feature '+str(f)] - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "Bayes" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = np.int(np.ceil(math.log(prob_partition+1,2))) - Planter_config['p4 config']["prob data bits"] = np.int(np.ceil(math.log(prob_partition**(1+num_features),2))) - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['p4 config']["code tbl len"] = len(Exact_Table['get total prob'].keys()) - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - prob_list = {} - for c in range(num_classes): - prob_list['c'+str(c)] = np.zeros(num_features).tolist() - - for f in range(num_features): - for c in range(num_classes): - prob_list['c' + str(c)][f] = Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] - - class_prob = np.zeros(num_classes).tolist() - for c in range(num_classes): - match_or_not = False - for index in Exact_Table['get total prob']: - if prob_list['c' + str(c)]==Exact_Table['get total prob'][index]['product_list']: - class_prob[c] = Exact_Table['get total prob'][index]['class'+str(c)] - match_or_not = True - break - if not match_or_not: - print('Get total prob table not matched') - switch_prediction = class_prob.index(np.max(class_prob)) - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i+1], test_y[:i+1]), accuracy_score(sklearn_test_y[:i+1], test_y[:i+1])), - end="") - - - print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) - result = classification_report(test_y, switch_test_y, digits=4) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import argparse + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn.metrics import * +from sklearn.svm import SVC +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +import pydotplus +import copy +import math +import time + +import os +import sys +# import grpc +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * + +def ten_to_bin(num,count): + num = int(num) + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class + part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 + part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + return part_1*np.exp(-part_2_u/part_2_l) + + +def get_final_prob(feature_num, product_list, num_classes, num_features, scale, Exact_Table, Bayes_separate_table, num): + if feature_num == num_features: + Exact_Table['get total prob'][num] = {} + Exact_Table['get total prob'][num]['product_list'] = copy.deepcopy(product_list) + value = 1 + for f in range(num_features): + value = value * product_list[f] + for c in range(num_classes): + Exact_Table['get total prob'][num]['class'+str(c)] = np.round(copy.deepcopy(value*Bayes_separate_table['class prob']['class '+str(c)])) + # print(product_list,Bayes_separate_table['class prob']['class '+str(c)], Exact_Table['get total prob'][num]['class'+str(c)]) + # print(Exact_Table['get total prob'][num]) + num += 1 + return Exact_Table, num + else: + for value in range(scale+1): + product_list[feature_num] = value + feature_num += 1 + Exact_Table, num = get_final_prob(feature_num, product_list, num_classes, num_features, scale, Exact_Table, Bayes_separate_table, num) + feature_num -= 1 + return Exact_Table, num + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['prob partition'] = np.int(input('- Number prob partition for each feature\'s posterior probability? (default = 6) ') or '6') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + prob_partition = Planter_config['model config']['prob partition'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # fit + clf = GaussianNB() + clf.fit(train_X, train_y) + sklearn_y_predict = clf.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + model_parmeters={} + for c in range(num_classes): + model_parmeters['c'+str(c)] = {} + for f in range(num_features): + model_parmeters['c' + str(c)]["f"+str(f)] = {} + model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) + # model_parmeters['c' + str(c)]["f" + str(f)]['std'] = clf.sigma_[c, f] + model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] + + value_info = {} + for f in range(num_features): + value_info["f" + str(f)] = {} + value_info["f"+str(f)]["max"] = 0 + value_info["f"+str(f)]["min"] = 0 + Bayes_separate_table = {} + for f in range(num_features): + Bayes_separate_table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Bayes_separate_table['feature '+str(f)][inputs]={} + # class_prob = probability_f_No(feature,i,model_parmeters, classes = 2) + for c in range(num_classes): + value = calculate_prob(inputs,f,c,model_parmeters) + Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value + if value > value_info["f"+str(f)]["max"]: + value_info["f"+str(f)]["max"] = value + if value < value_info["f"+str(f)]["min"]: + value_info["f"+str(f)]["min"] = value + + Bayes_separate_table["class prob"] = {} + for c in range(num_classes): + Bayes_separate_table["class prob"]['class '+str(c)]= clf.class_prior_[c] + + scale = prob_partition + for c in range(num_classes): + min_x = value_info["f" + str(f)]["min"] + max_x = value_info["f" + str(f)]["max"] + x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) + # x = copy.deepcopy(int(scale * Bayes_separate_table['class prob']['class ' + str(c)])) + value = Single_MaxMinNormalization(x, min_x, max_x) + Bayes_separate_table['class prob']['class '+str(c)] = copy.deepcopy(value*scale) + + + for f in range(num_features): + for inputs in range(0,feature_max[f]+1): + for c in range(num_classes): + min_x = value_info["f"+str(f)]["min"] + max_x = value_info["f"+str(f)]["max"] + x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] + value = Single_MaxMinNormalization(x, min_x, max_x) + Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = np.int(value*scale) + + + + Exact_Table = {} + Exact_Table['get total prob'] = {} + Exact_Table, _ = get_final_prob(0, np.zeros(num_features).tolist(), num_classes, num_features, scale, Exact_Table,Bayes_separate_table, 0) + for f in range(num_features): + Exact_Table['feature '+str(f)] = Bayes_separate_table['feature '+str(f)] + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "Bayes" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = np.int(np.ceil(math.log(prob_partition+1,2))) + Planter_config['p4 config']["prob data bits"] = np.int(np.ceil(math.log(prob_partition**(1+num_features),2))) + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['p4 config']["code tbl len"] = len(Exact_Table['get total prob'].keys()) + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + prob_list = {} + for c in range(num_classes): + prob_list['c'+str(c)] = np.zeros(num_features).tolist() + + for f in range(num_features): + for c in range(num_classes): + prob_list['c' + str(c)][f] = Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] + + class_prob = np.zeros(num_classes).tolist() + for c in range(num_classes): + match_or_not = False + for index in Exact_Table['get total prob']: + if prob_list['c' + str(c)]==Exact_Table['get total prob'][index]['product_list']: + class_prob[c] = Exact_Table['get total prob'][index]['class'+str(c)] + match_or_not = True + break + if not match_or_not: + print('Get total prob table not matched') + switch_prediction = class_prob.index(np.max(class_prob)) + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i+1], test_y[:i+1]), accuracy_score(sklearn_test_y[:i+1], test_y[:i+1])), + end="") + + + print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) + result = classification_report(test_y, switch_test_y, digits=4) print('\n', result) \ No newline at end of file diff --git a/src/models/Bayes/Type_3/dedicated_p4.py b/src/models/Bayes/Type_3/dedicated_p4.py index ec2a66f..8815eba 100755 --- a/src/models/Bayes/Type_3/dedicated_p4.py +++ b/src/models/Bayes/Type_3/dedicated_p4.py @@ -1,360 +1,360 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import copy -import math - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c+1, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") - - for f in range(config['num_features']): - for c in range(config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)+";\n") - - write_addition_meta(config, headers, 1) - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.f" + str(f) + "c" + str(c)+" = f"+str(f)+"c"+str(c) +";\n") - - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { meta.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_class_prob(") - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) - - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table class_prob {\n" - " key = {meta.flag:exact;}\n" - " actions={read_class_prob; NoAction;}\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - ingress.write(" action add_layer0(){\n") - write_layer1(config, ingress) - ingress.write(" }\n\n") - - write_addition_operation(config, ingress, 1) - - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) - +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"meta.result = "+str(np.int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" - +(config['num_bits']-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def write_layer1(config, txt_file): - for c in range(0, config['num_classes']): - com = 0 - rest_component = config['num_features'] - for r in range(rest_component): - if 2 * com + 1 < config['num_features']: - f_n = 2 * com - txt_file.write(" meta.layer1_com" + str(com) + "_class" + str(c) + - " = meta.f" + str(f_n) + "c" + str(c) + " + meta.f" + str(f_n + 1) + "c" + str( - c) + " ;\n") - com += 1 - elif config['num_features']%2 == 1: - f_n = 2 * com - txt_file.write(" meta.layer1_com" + str(com) + "_class" + str(c) + - " = meta.f" + str(f_n) + "c" + str(c) + " + meta.prob_c" + str(c) + " ;\n") - com += 1 - break - elif config['num_features']%2 == 0: - f_n = 2 * com - txt_file.write(" meta.layer1_com" + str(com) + "_class" + str(c) + - " = meta.prob_c" + str(c) + " ;\n") - com += 1 - break - - -def write_addition_meta(config, txt_file, num_bias): - for c in range(0, config['num_classes']): - rest_component = config['num_features'] + num_bias - layer = 0 - while True: - layer += 1 - if rest_component == 2: - break - else: - rest_component = np.int(np.ceil(rest_component / 2)) - for com in range(rest_component): - txt_file.write( " bit<" + str(config['num_bits']) + "> layer" + str(layer) + "_com" + str(com) + "_class" + str(c) + ";\n") - - - -def write_addition_operation(config, txt_file, num_bias): - num_layer = np.int(np.ceil(math.log( config['num_features']+1,2))) - for current_layer in range(1, num_layer): - txt_file.write(" action add_layer" + str(current_layer) + "(){\n") - for c in range(0, config['num_classes']): - rest_component = config['num_features'] + num_bias - rest_component = np.int(np.ceil(rest_component / 2)) - layer = 0 - while True: - layer += 1 - - if rest_component == 2: - if layer == current_layer: - txt_file.write(" meta.prob_c" + str(c) + "= meta.layer" + str(layer) + "_com0_class" + str(c) +" + meta.layer" + str(layer) + "_com1_class" + str(c)+" ;\n") - - break - else: - last_component = copy.deepcopy(rest_component) - rest_component = np.int(np.ceil(rest_component / 2)) - for com in range(rest_component): - if layer == current_layer: - if com*2+1>(last_component-1): - txt_file.write( " meta.layer" + str(layer+1) + "_com" + str(com) + "_class" + str(c) + - " = meta.layer" + str(layer) + "_com" + str(com * 2) + "_class" + str(c) + ";\n") - else: - txt_file.write( " meta.layer" + str(layer+1) + "_com" + str(com) + "_class" + str(c) + - " = meta.layer" + str(layer) + "_com" + str(com * 2) + "_class" + str(c) + - " + meta.layer" + str(layer) + "_com" + str(com * 2 + 1) + "_class" + str(c) + ";\n") - txt_file.write(" }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" class_prob.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - num_layer = np.int(np.ceil(math.log( config['num_features']+1,2))) - for current_layer in range(num_layer ): - ingress.write(" add_layer" + str(current_layer) + "();\n") - - ingress.write(" compare();\n\n") - - - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - # sde_root = Planter_config['directory config']['sde'] - # tools_root = Planter_config['directory config']['tools'] - work_root = Planter_config['directory config']['work'] - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for c in range(0, config['num_classes']): - if c==0: - tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load thresh_and_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.class_prob.add_with_read_class_prob(" - "1, ") - for c in range(0, config['num_classes']): - if c == 0: - tload.write("table['class prob']['class " + str(c) + "']") - else: - tload.write(", table['class prob']['class " + str(c) + "']") - - tload.write(")\n\n") - - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import copy +import math + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c+1, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") + + for f in range(config['num_features']): + for c in range(config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)+";\n") + + write_addition_meta(config, headers, 1) + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.f" + str(f) + "c" + str(c)+" = f"+str(f)+"c"+str(c) +";\n") + + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { meta.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_class_prob(") + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) + + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table class_prob {\n" + " key = {meta.flag:exact;}\n" + " actions={read_class_prob; NoAction;}\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + ingress.write(" action add_layer0(){\n") + write_layer1(config, ingress) + ingress.write(" }\n\n") + + write_addition_operation(config, ingress, 1) + + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) + +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"meta.result = "+str(np.int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" + +(config['num_bits']-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def write_layer1(config, txt_file): + for c in range(0, config['num_classes']): + com = 0 + rest_component = config['num_features'] + for r in range(rest_component): + if 2 * com + 1 < config['num_features']: + f_n = 2 * com + txt_file.write(" meta.layer1_com" + str(com) + "_class" + str(c) + + " = meta.f" + str(f_n) + "c" + str(c) + " + meta.f" + str(f_n + 1) + "c" + str( + c) + " ;\n") + com += 1 + elif config['num_features']%2 == 1: + f_n = 2 * com + txt_file.write(" meta.layer1_com" + str(com) + "_class" + str(c) + + " = meta.f" + str(f_n) + "c" + str(c) + " + meta.prob_c" + str(c) + " ;\n") + com += 1 + break + elif config['num_features']%2 == 0: + f_n = 2 * com + txt_file.write(" meta.layer1_com" + str(com) + "_class" + str(c) + + " = meta.prob_c" + str(c) + " ;\n") + com += 1 + break + + +def write_addition_meta(config, txt_file, num_bias): + for c in range(0, config['num_classes']): + rest_component = config['num_features'] + num_bias + layer = 0 + while True: + layer += 1 + if rest_component == 2: + break + else: + rest_component = np.int(np.ceil(rest_component / 2)) + for com in range(rest_component): + txt_file.write( " bit<" + str(config['num_bits']) + "> layer" + str(layer) + "_com" + str(com) + "_class" + str(c) + ";\n") + + + +def write_addition_operation(config, txt_file, num_bias): + num_layer = np.int(np.ceil(math.log( config['num_features']+1,2))) + for current_layer in range(1, num_layer): + txt_file.write(" action add_layer" + str(current_layer) + "(){\n") + for c in range(0, config['num_classes']): + rest_component = config['num_features'] + num_bias + rest_component = np.int(np.ceil(rest_component / 2)) + layer = 0 + while True: + layer += 1 + + if rest_component == 2: + if layer == current_layer: + txt_file.write(" meta.prob_c" + str(c) + "= meta.layer" + str(layer) + "_com0_class" + str(c) +" + meta.layer" + str(layer) + "_com1_class" + str(c)+" ;\n") + + break + else: + last_component = copy.deepcopy(rest_component) + rest_component = np.int(np.ceil(rest_component / 2)) + for com in range(rest_component): + if layer == current_layer: + if com*2+1>(last_component-1): + txt_file.write( " meta.layer" + str(layer+1) + "_com" + str(com) + "_class" + str(c) + + " = meta.layer" + str(layer) + "_com" + str(com * 2) + "_class" + str(c) + ";\n") + else: + txt_file.write( " meta.layer" + str(layer+1) + "_com" + str(com) + "_class" + str(c) + + " = meta.layer" + str(layer) + "_com" + str(com * 2) + "_class" + str(c) + + " + meta.layer" + str(layer) + "_com" + str(com * 2 + 1) + "_class" + str(c) + ";\n") + txt_file.write(" }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" class_prob.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + num_layer = np.int(np.ceil(math.log( config['num_features']+1,2))) + for current_layer in range(num_layer ): + ingress.write(" add_layer" + str(current_layer) + "();\n") + + ingress.write(" compare();\n\n") + + + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + # sde_root = Planter_config['directory config']['sde'] + # tools_root = Planter_config['directory config']['tools'] + work_root = Planter_config['directory config']['work'] + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for c in range(0, config['num_classes']): + if c==0: + tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load thresh_and_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.class_prob.add_with_read_class_prob(" + "1, ") + for c in range(0, config['num_classes']): + if c == 0: + tload.write("table['class prob']['class " + str(c) + "']") + else: + tload.write(", table['class prob']['class " + str(c) + "']") + + tload.write(")\n\n") + + + + + diff --git a/src/models/Bayes/Type_3/readme.md b/src/models/Bayes/Type_3/readme.md index 19d16ce..ba1e88a 100644 --- a/src/models/Bayes/Type_3/readme.md +++ b/src/models/Bayes/Type_3/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Bayes/Type_3/table_generator.py b/src/models/Bayes/Type_3/table_generator.py index 91e9f4d..c2249d9 100755 --- a/src/models/Bayes/Type_3/table_generator.py +++ b/src/models/Bayes/Type_3/table_generator.py @@ -1,257 +1,257 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn.metrics import * -from sklearn.svm import SVC -from sklearn.svm import LinearSVC -from sklearn.naive_bayes import GaussianNB -import pydotplus -import copy -import math - -import os -import sys -# import grpc -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * - -def ten_to_bin(num,count): - num = int(num) - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class - part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 - part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - return part_1*np.exp(-part_2_u/part_2_l) - - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # fit - clf = GaussianNB() - clf.fit(train_X, train_y) - sklearn_y_predict = clf.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - model_parmeters={} - for c in range(num_classes): - model_parmeters['c'+str(c)] = {} - for f in range(num_features): - model_parmeters['c' + str(c)]["f"+str(f)] = {} - model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) - model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] - - value_info = {} - value_info["max"] = 0 - value_info["min"] = 0 - for f in range(num_features): - value_info["f" + str(f)] = {} - value_info["f"+str(f)]["max"] = 0 - value_info["f"+str(f)]["min"] = 0 - - Bayes_separate_table = {} - for f in range(num_features): - Bayes_separate_table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Bayes_separate_table['feature '+str(f)][inputs]={} - for c in range(num_classes): - if calculate_prob(inputs,f,c,model_parmeters)==0: - value = value_info["min"] - else: - value = math.log(calculate_prob(inputs,f,c,model_parmeters),2) - Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - Bayes_separate_table["class prob"] = {} - for c in range(num_classes): - value = clf.class_prior_[c] - Bayes_separate_table["class prob"]['class '+str(c)]= math.log(value,2) - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - scale = (2**num_bits)/(num_features+1) - Exact_Table = {} - Exact_Table['class prob'] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['class prob']['class '+str(c)] = np.int(np.round(value*scale)) - - - for f in range(num_features): - Exact_Table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Exact_Table['feature ' + str(f)][inputs] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = np.int(np.round(value*scale)) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "Bayes" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - class_prob = np.zeros(num_classes).tolist() - - for c in range(num_classes): - class_prob[c] = Exact_Table['class prob']['class '+str(c)] - - - for f in range(num_features): - for c in range(num_classes): - class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] - - switch_prediction = class_prob.index(np.max(class_prob)) - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) - result = classification_report(test_y, switch_test_y, digits=4) - print('\n', result) - - - - - - -def resource_prediction(): - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['resource info'] = {} - - - - tcam_per_stage = 1000 - exact_per_stage = 1000 +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn.metrics import * +from sklearn.svm import SVC +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +import pydotplus +import copy +import math + +import os +import sys +# import grpc +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * + +def ten_to_bin(num,count): + num = int(num) + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class + part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 + part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + return part_1*np.exp(-part_2_u/part_2_l) + + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # fit + clf = GaussianNB() + clf.fit(train_X, train_y) + sklearn_y_predict = clf.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + model_parmeters={} + for c in range(num_classes): + model_parmeters['c'+str(c)] = {} + for f in range(num_features): + model_parmeters['c' + str(c)]["f"+str(f)] = {} + model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) + model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] + + value_info = {} + value_info["max"] = 0 + value_info["min"] = 0 + for f in range(num_features): + value_info["f" + str(f)] = {} + value_info["f"+str(f)]["max"] = 0 + value_info["f"+str(f)]["min"] = 0 + + Bayes_separate_table = {} + for f in range(num_features): + Bayes_separate_table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Bayes_separate_table['feature '+str(f)][inputs]={} + for c in range(num_classes): + if calculate_prob(inputs,f,c,model_parmeters)==0: + value = value_info["min"] + else: + value = math.log(calculate_prob(inputs,f,c,model_parmeters),2) + Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + Bayes_separate_table["class prob"] = {} + for c in range(num_classes): + value = clf.class_prior_[c] + Bayes_separate_table["class prob"]['class '+str(c)]= math.log(value,2) + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + scale = (2**num_bits)/(num_features+1) + Exact_Table = {} + Exact_Table['class prob'] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['class prob']['class '+str(c)] = np.int(np.round(value*scale)) + + + for f in range(num_features): + Exact_Table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Exact_Table['feature ' + str(f)][inputs] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = np.int(np.round(value*scale)) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "Bayes" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + class_prob = np.zeros(num_classes).tolist() + + for c in range(num_classes): + class_prob[c] = Exact_Table['class prob']['class '+str(c)] + + + for f in range(num_features): + for c in range(num_classes): + class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] + + switch_prediction = class_prob.index(np.max(class_prob)) + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) + result = classification_report(test_y, switch_test_y, digits=4) + print('\n', result) + + + + + + +def resource_prediction(): + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['resource info'] = {} + + + + tcam_per_stage = 1000 + exact_per_stage = 1000 print('The predicted number of stages usage in table allocation:') \ No newline at end of file diff --git a/src/models/Bayes/Type_LB/dedicated_p4.py b/src/models/Bayes/Type_LB/dedicated_p4.py index e300cb2..082d90f 100755 --- a/src/models/Bayes/Type_LB/dedicated_p4.py +++ b/src/models/Bayes/Type_LB/dedicated_p4.py @@ -1,335 +1,335 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c+1, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.prob_c" + str(c)+" = meta.prob_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_class_prob(") - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) - - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table class_prob {\n" - " key = {hdr.Planter.ver:exact;}\n" - " actions={read_class_prob; NoAction;}\n" - # " const entries = {\n" - # " 0 : "+config['thresh_and_bias'] +"};\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) - +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(np.int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" - +(config['num_bits']-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" class_prob.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" compare();\n\n") - - - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["hdr.Planter.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["f"+str(f)+"c"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] - Table_entries += [Entry] - - Entry = {} - Entry["table"] = "SwitchIngress.class_prob" - Entry["match"] = {} - Entry["match"]["hdr.Planter.ver"] = 1 - Entry["action_name"] = "SwitchIngress.read_class_prob" - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["p_c" + str(c)] = Exact_Table["class prob"]["class " + str(c)] - Table_entries += [Entry] - - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => ") - for c in range(num_classes): - file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") - file.write("\n") - file.write("\n") - - - - file.write("table_add SwitchIngress.class_prob read_class_prob 1 => ") - for c in range(num_classes): - file.write(str(Exact_Table["class prob"]["class " + str(c)])+" ") - file.write("\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for c in range(0, config['num_classes']): - if c==0: - tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load thresh_and_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.class_prob.add_with_read_class_prob(" - "1, ") - for c in range(0, config['num_classes']): - if c == 0: - tload.write("table['class prob']['class " + str(c) + "']") - else: - tload.write(", table['class prob']['class " + str(c) + "']") - - tload.write(")\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c+1, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.prob_c" + str(c)+" = meta.prob_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_class_prob(") + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) + + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table class_prob {\n" + " key = {hdr.Planter.ver:exact;}\n" + " actions={read_class_prob; NoAction;}\n" + # " const entries = {\n" + # " 0 : "+config['thresh_and_bias'] +"};\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) + +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(np.int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" + +(config['num_bits']-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" class_prob.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" compare();\n\n") + + + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["hdr.Planter.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["f"+str(f)+"c"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] + Table_entries += [Entry] + + Entry = {} + Entry["table"] = "SwitchIngress.class_prob" + Entry["match"] = {} + Entry["match"]["hdr.Planter.ver"] = 1 + Entry["action_name"] = "SwitchIngress.read_class_prob" + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["p_c" + str(c)] = Exact_Table["class prob"]["class " + str(c)] + Table_entries += [Entry] + + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => ") + for c in range(num_classes): + file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") + file.write("\n") + file.write("\n") + + + + file.write("table_add SwitchIngress.class_prob read_class_prob 1 => ") + for c in range(num_classes): + file.write(str(Exact_Table["class prob"]["class " + str(c)])+" ") + file.write("\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for c in range(0, config['num_classes']): + if c==0: + tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load thresh_and_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.class_prob.add_with_read_class_prob(" + "1, ") + for c in range(0, config['num_classes']): + if c == 0: + tload.write("table['class prob']['class " + str(c) + "']") + else: + tload.write(", table['class prob']['class " + str(c) + "']") + + tload.write(")\n\n") diff --git a/src/models/Bayes/Type_LB/readme.md b/src/models/Bayes/Type_LB/readme.md index 19d16ce..ba1e88a 100644 --- a/src/models/Bayes/Type_LB/readme.md +++ b/src/models/Bayes/Type_LB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Bayes/Type_LB/table_generator.py b/src/models/Bayes/Type_LB/table_generator.py index 0850521..4bbd304 100755 --- a/src/models/Bayes/Type_LB/table_generator.py +++ b/src/models/Bayes/Type_LB/table_generator.py @@ -1,273 +1,273 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn.metrics import * -from sklearn.svm import SVC -from sklearn.svm import LinearSVC -from sklearn.naive_bayes import GaussianNB -import pydotplus -import copy -import math - -import os -import sys -# import grpc -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * - -def ten_to_bin(num,count): - num = int(num) - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class - part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 - part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) - return part_1*np.exp(-part_2_u/part_2_l) - - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # fit - clf = GaussianNB() - clf.fit(train_X, train_y) - sklearn_y_predict = clf.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - model_parmeters={} - for c in range(num_classes): - model_parmeters['c'+str(c)] = {} - for f in range(num_features): - model_parmeters['c' + str(c)]["f"+str(f)] = {} - model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) - model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] - - value_info = {} - value_info["max"] = 0 - value_info["min"] = 0 - for f in range(num_features): - value_info["f" + str(f)] = {} - value_info["f"+str(f)]["max"] = 0 - value_info["f"+str(f)]["min"] = 0 - - Bayes_separate_table = {} - for f in range(num_features): - Bayes_separate_table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Bayes_separate_table['feature '+str(f)][inputs]={} - for c in range(num_classes): - if calculate_prob(inputs,f,c,model_parmeters)==0: - value = value_info["min"] - else: - value = math.log(calculate_prob(inputs,f,c,model_parmeters),2) - Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - Bayes_separate_table["class prob"] = {} - for c in range(num_classes): - value = clf.class_prior_[c] - Bayes_separate_table["class prob"]['class '+str(c)]= math.log(value,2) - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - scale = (2**num_bits)/(num_features+1) - Exact_Table = {} - Exact_Table['class prob'] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) - # x = copy.deepcopy(int(scale * Bayes_separate_table['class prob']['class ' + str(c)])) - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['class prob']['class '+str(c)] = np.int(np.round(value*scale)) - - - for f in range(num_features): - Exact_Table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Exact_Table['feature ' + str(f)][inputs] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = np.int(np.round(value*scale)) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "Bayes" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - class_prob = np.zeros(num_classes).tolist() - - for c in range(num_classes): - class_prob[c] = Exact_Table['class prob']['class '+str(c)] - - - for f in range(num_features): - for c in range(num_classes): - class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] - # print(class_prob) - switch_prediction = class_prob.index(np.max(class_prob)) - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.4}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - # sys.stdout.flush() - print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) - result = classification_report(test_y, switch_test_y, digits=4) - print('\n', result) - - - - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['resource info'] = {} - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - - logical_dependency_memory = np.zeros(num_features+1) - logical_dependency_memory[0] = num_classes * ( Planter_config['p4 config']["action data bits"] + 8) - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - for s in range(num_features): - logical_dependency_memory[s+1] = Planter_config['p4 config']["feature tbl len"][s]*(Planter_config['p4 config']["action data bits"]+32) - - Planter_config['resource info']['logical memory per dependency'] = logical_dependency_memory.astype(int) - Planter_config['resource info']['total table bits'] = np.sum(logical_dependency_memory.astype(int)) - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is updated') - - - -if __name__ == "__main__": - resource_prediction() - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn.metrics import * +from sklearn.svm import SVC +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +import pydotplus +import copy +import math + +import os +import sys +# import grpc +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * + +def ten_to_bin(num,count): + num = int(num) + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def calculate_prob(input,feature_No, class_No, model_parmeters):#i is class + part_1 = 1 / np.sqrt(2 * np.pi * model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + part_2_u = (input - model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['mean']) ** 2 + part_2_l = 2 * (model_parmeters['c'+str(class_No)]['f'+str(feature_No)]['std'] ** 2) + return part_1*np.exp(-part_2_u/part_2_l) + + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # fit + clf = GaussianNB() + clf.fit(train_X, train_y) + sklearn_y_predict = clf.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + model_parmeters={} + for c in range(num_classes): + model_parmeters['c'+str(c)] = {} + for f in range(num_features): + model_parmeters['c' + str(c)]["f"+str(f)] = {} + model_parmeters['c' + str(c)]["f"+str(f)]['std'] = np.sqrt(clf.sigma_[c,f]) + model_parmeters['c' + str(c)]["f"+str(f)]['mean'] = clf.theta_[c,f] + + value_info = {} + value_info["max"] = 0 + value_info["min"] = 0 + for f in range(num_features): + value_info["f" + str(f)] = {} + value_info["f"+str(f)]["max"] = 0 + value_info["f"+str(f)]["min"] = 0 + + Bayes_separate_table = {} + for f in range(num_features): + Bayes_separate_table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Bayes_separate_table['feature '+str(f)][inputs]={} + for c in range(num_classes): + if calculate_prob(inputs,f,c,model_parmeters)==0: + value = value_info["min"] + else: + value = math.log(calculate_prob(inputs,f,c,model_parmeters),2) + Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + Bayes_separate_table["class prob"] = {} + for c in range(num_classes): + value = clf.class_prior_[c] + Bayes_separate_table["class prob"]['class '+str(c)]= math.log(value,2) + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + scale = (2**num_bits)/(num_features+1) + Exact_Table = {} + Exact_Table['class prob'] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) + # x = copy.deepcopy(int(scale * Bayes_separate_table['class prob']['class ' + str(c)])) + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['class prob']['class '+str(c)] = np.int(np.round(value*scale)) + + + for f in range(num_features): + Exact_Table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Exact_Table['feature ' + str(f)][inputs] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = np.int(np.round(value*scale)) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "Bayes" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + class_prob = np.zeros(num_classes).tolist() + + for c in range(num_classes): + class_prob[c] = Exact_Table['class prob']['class '+str(c)] + + + for f in range(num_features): + for c in range(num_classes): + class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] + # print(class_prob) + switch_prediction = class_prob.index(np.max(class_prob)) + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.4}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + # sys.stdout.flush() + print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) + result = classification_report(test_y, switch_test_y, digits=4) + print('\n', result) + + + + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['resource info'] = {} + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + + logical_dependency_memory = np.zeros(num_features+1) + logical_dependency_memory[0] = num_classes * ( Planter_config['p4 config']["action data bits"] + 8) + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + for s in range(num_features): + logical_dependency_memory[s+1] = Planter_config['p4 config']["feature tbl len"][s]*(Planter_config['p4 config']["action data bits"]+32) + + Planter_config['resource info']['logical memory per dependency'] = logical_dependency_memory.astype(int) + Planter_config['resource info']['total table bits'] = np.sum(logical_dependency_memory.astype(int)) + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is updated') + + + +if __name__ == "__main__": + resource_prediction() + diff --git a/src/models/Bayes/Type_LB_Bernoulli/dedicated_p4.py b/src/models/Bayes/Type_LB_Bernoulli/dedicated_p4.py index 8d9abcb..9222d40 100755 --- a/src/models/Bayes/Type_LB_Bernoulli/dedicated_p4.py +++ b/src/models/Bayes/Type_LB_Bernoulli/dedicated_p4.py @@ -1,334 +1,334 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c+1, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") - - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.prob_c" + str(c)+" = meta.prob_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_class_prob(") - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) - - ingress.write("){\n") - for c in range(0, config['num_classes']): - ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table class_prob {\n" - " key = {hdr.Planter.ver:exact;}\n" - " actions={read_class_prob; NoAction;}\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) - +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(np.int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" - +(config['num_bits']-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" class_prob.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" compare();\n\n") - - - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["hdr.Planter.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["f"+str(f)+"c"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] - Table_entries += [Entry] - - Entry = {} - Entry["table"] = "SwitchIngress.class_prob" - Entry["match"] = {} - Entry["match"]["hdr.Planter.ver"] = 1 - Entry["action_name"] = "SwitchIngress.read_class_prob" - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["p_c" + str(c)] = Exact_Table["class prob"]["class " + str(c)] - Table_entries += [Entry] - - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => ") - for c in range(num_classes): - file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") - file.write("\n") - file.write("\n") - - - - file.write("table_add SwitchIngress.class_prob read_class_prob 1 => ") - for c in range(num_classes): - file.write(str(Exact_Table["class prob"]["class " + str(c)])+" ") - file.write("\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for c in range(0, config['num_classes']): - if c==0: - tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load thresh_and_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.class_prob.add_with_read_class_prob(" - "1, ") - for c in range(0, config['num_classes']): - if c == 0: - tload.write("table['class prob']['class " + str(c) + "']") - else: - tload.write(", table['class prob']['class " + str(c) + "']") - - tload.write(")\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> prob_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c+1, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") + + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.prob_c" + str(c)+" = meta.prob_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_class_prob(") + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> p_c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> p_c"+str(c)) + + ingress.write("){\n") + for c in range(0, config['num_classes']): + ingress.write(" meta.prob_c" + str(c)+ " = p_c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table class_prob {\n" + " key = {hdr.Planter.ver:exact;}\n" + " actions={read_class_prob; NoAction;}\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(np.int(compare[0])) +"_"+str(np.int(compare[1])) + +" = meta.prob_c" +str(np.int(compare[0])) +" - meta.prob_c"+str(np.int(compare[1]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(np.int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(np.int(compare[0]))+"_"+str(np.int(compare[1]))+"& 0b1" + +(config['num_bits']-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" class_prob.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" compare();\n\n") + + + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["hdr.Planter.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["f"+str(f)+"c"+str(c)] = Exact_Table['feature ' + str(f)][idx]["class "+str(c)] + Table_entries += [Entry] + + Entry = {} + Entry["table"] = "SwitchIngress.class_prob" + Entry["match"] = {} + Entry["match"]["hdr.Planter.ver"] = 1 + Entry["action_name"] = "SwitchIngress.read_class_prob" + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["p_c" + str(c)] = Exact_Table["class prob"]["class " + str(c)] + Table_entries += [Entry] + + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => ") + for c in range(num_classes): + file.write(str(Exact_Table['feature ' + str(f)][idx]["class " + str(c)])+" ") + file.write("\n") + file.write("\n") + + + + file.write("table_add SwitchIngress.class_prob read_class_prob 1 => ") + for c in range(num_classes): + file.write(str(Exact_Table["class prob"]["class " + str(c)])+" ") + file.write("\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for c in range(0, config['num_classes']): + if c==0: + tload.write("table['feature " + str(f) + "'][key]['class " + str(c) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['class "+str(c)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load thresh_and_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.class_prob.add_with_read_class_prob(" + "1, ") + for c in range(0, config['num_classes']): + if c == 0: + tload.write("table['class prob']['class " + str(c) + "']") + else: + tload.write(", table['class prob']['class " + str(c) + "']") + + tload.write(")\n\n") diff --git a/src/models/Bayes/Type_LB_Bernoulli/readme.md b/src/models/Bayes/Type_LB_Bernoulli/readme.md index 19d16ce..ba1e88a 100644 --- a/src/models/Bayes/Type_LB_Bernoulli/readme.md +++ b/src/models/Bayes/Type_LB_Bernoulli/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for Bayes. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/Bayes/Type_LB_Bernoulli/table_generator.py b/src/models/Bayes/Type_LB_Bernoulli/table_generator.py index 42d7d56..6910080 100755 --- a/src/models/Bayes/Type_LB_Bernoulli/table_generator.py +++ b/src/models/Bayes/Type_LB_Bernoulli/table_generator.py @@ -1,300 +1,300 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import argparse -import time -from sklearn import metrics -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn.metrics import * -from sklearn.svm import SVC -from sklearn.svm import LinearSVC -from sklearn.naive_bayes import GaussianNB -from sklearn.naive_bayes import BernoulliNB -from sklearn.naive_bayes import MultinomialNB -from sklearn.naive_bayes import ComplementNB -import pydotplus -import copy -import math - -import os -import sys -# import grpc -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * -from src.functions.input_CLI import * - -def ten_to_bin(num,count): - num = int(num) - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - # =================== set max leaf node in config =================== - question = 'Threshold value for binarization?' - default = 10 - Planter_config = take_CLI_input(Planter_config, 'model config', 'binarize threshold', question, default, - manually_input=False, numeric=True) - threshold = Planter_config['model config']['binarize threshold'] - # =================== set max leaf node in config =================== - question = 'Number of bits for each action data?' - default = 16 - Planter_config = take_CLI_input(Planter_config, 'model config', 'number of bits', question, default, - manually_input=False, numeric=True) - - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # fit - # clf = GaussianNB() - clf = BernoulliNB(alpha=0, binarize= threshold, class_prior=None, fit_prior=True) - clf.fit(train_X, train_y) - sklearn_y_predict = clf.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - if num_classes==2: - tn, fp, fn, tp = metrics.confusion_matrix(test_y, sklearn_y_predict).ravel() - precision, sensitivity, specificity = tp/(tp+fp), tp/(tp+fn), tn/(tn+fp) - print('\nPrecision:', precision) - print('\nSensitivity:', sensitivity) - print('\nSpecificity:', specificity) - print('\nAccuracy:', (tp+tn)/(tn+fp+fn+tp)) - print('\nF1-Score:', 2*(precision*sensitivity)/(precision+sensitivity)) - print('\nMCC:', metrics.matthews_corrcoef(test_y, sklearn_y_predict)) - print('\nBCR:', metrics.balanced_accuracy_score(test_y, sklearn_y_predict)) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - model_parmeters={} - for c in range(num_classes): - model_parmeters['c'+str(c)] = {} - for f in range(num_features): - model_parmeters['c' + str(c)]["f"+str(f)] = {} - model_parmeters['c' + str(c)]["f"+str(f)]['log_prob'] = clf.feature_log_prob_[c,f] - - - value_info = {} - value_info["max"] = 0 - value_info["min"] = 0 - for f in range(num_features): - value_info["f" + str(f)] = {} - value_info["f"+str(f)]["max"] = 0 - value_info["f"+str(f)]["min"] = 0 - - Bayes_separate_table = {} - for f in range(num_features): - Bayes_separate_table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Bayes_separate_table['feature '+str(f)][inputs]={} - for c in range(num_classes): - if inputs > threshold: - x = 1 - else: - x = 0 - partial_neg_prob = np.log(1- np.exp(model_parmeters['c' + str(c)]["f"+str(f)]['log_prob'])) - partial_jll = x * (model_parmeters['c' + str(c)]["f"+str(f)]['log_prob'] - partial_neg_prob) - - value = copy.deepcopy(partial_jll + partial_neg_prob) - Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - Bayes_separate_table["class prob"] = {} - for c in range(num_classes): - value = clf.class_log_prior_[c] - Bayes_separate_table["class prob"]['class '+str(c)]= value - if value > value_info["max"]: - value_info["max"] = value - if value < value_info["min"]: - value_info["min"] = value - - scale = (2**num_bits)/(num_features+1) - Exact_Table = {} - Exact_Table['class prob'] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['class prob']['class '+str(c)] = int(np.round(value*scale)) - - - - for f in range(num_features): - Exact_Table['feature '+str(f)] = {} - for inputs in range(0,feature_max[f]+1): - Exact_Table['feature ' + str(f)][inputs] = {} - for c in range(num_classes): - min_x = value_info["min"] - max_x = value_info["max"] - x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] - value = Single_MaxMinNormalization(x, min_x, max_x) - Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = int(np.round(value*scale)) - - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "Bayes" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - class_prob = np.zeros(num_classes).tolist() - - for c in range(num_classes): - class_prob[c] = Exact_Table['class prob']['class '+str(c)] - - - for f in range(num_features): - for c in range(num_classes): - class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] - # print(class_prob) - switch_prediction = class_prob.index(np.max(class_prob)) - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.4}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) - result = classification_report(test_y, switch_test_y, digits=4) - print('\n', result) - - - - - - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['resource info'] = {} - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - - logical_dependency_memory = np.zeros(num_features+1) - logical_dependency_memory[0] = num_classes * ( Planter_config['p4 config']["action data bits"] + 8) - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - for s in range(num_features): - logical_dependency_memory[s+1] = Planter_config['p4 config']["feature tbl len"][s]*(Planter_config['p4 config']["action data bits"]+32) - - Planter_config['resource info']['logical memory per dependency'] = logical_dependency_memory.astype(int) - Planter_config['resource info']['total table bits'] = np.sum(logical_dependency_memory.astype(int)) - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is updated') - - - -if __name__ == "__main__": - resource_prediction() +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import argparse +import time +from sklearn import metrics +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn.metrics import * +from sklearn.svm import SVC +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +from sklearn.naive_bayes import BernoulliNB +from sklearn.naive_bayes import MultinomialNB +from sklearn.naive_bayes import ComplementNB +import pydotplus +import copy +import math + +import os +import sys +# import grpc +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * +from src.functions.input_CLI import * + +def ten_to_bin(num,count): + num = int(num) + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + # =================== set max leaf node in config =================== + question = 'Threshold value for binarization?' + default = 10 + Planter_config = take_CLI_input(Planter_config, 'model config', 'binarize threshold', question, default, + manually_input=False, numeric=True) + threshold = Planter_config['model config']['binarize threshold'] + # =================== set max leaf node in config =================== + question = 'Number of bits for each action data?' + default = 16 + Planter_config = take_CLI_input(Planter_config, 'model config', 'number of bits', question, default, + manually_input=False, numeric=True) + + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # fit + # clf = GaussianNB() + clf = BernoulliNB(alpha=0, binarize= threshold, class_prior=None, fit_prior=True) + clf.fit(train_X, train_y) + sklearn_y_predict = clf.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + if num_classes==2: + tn, fp, fn, tp = metrics.confusion_matrix(test_y, sklearn_y_predict).ravel() + precision, sensitivity, specificity = tp/(tp+fp), tp/(tp+fn), tn/(tn+fp) + print('\nPrecision:', precision) + print('\nSensitivity:', sensitivity) + print('\nSpecificity:', specificity) + print('\nAccuracy:', (tp+tn)/(tn+fp+fn+tp)) + print('\nF1-Score:', 2*(precision*sensitivity)/(precision+sensitivity)) + print('\nMCC:', metrics.matthews_corrcoef(test_y, sklearn_y_predict)) + print('\nBCR:', metrics.balanced_accuracy_score(test_y, sklearn_y_predict)) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + model_parmeters={} + for c in range(num_classes): + model_parmeters['c'+str(c)] = {} + for f in range(num_features): + model_parmeters['c' + str(c)]["f"+str(f)] = {} + model_parmeters['c' + str(c)]["f"+str(f)]['log_prob'] = clf.feature_log_prob_[c,f] + + + value_info = {} + value_info["max"] = 0 + value_info["min"] = 0 + for f in range(num_features): + value_info["f" + str(f)] = {} + value_info["f"+str(f)]["max"] = 0 + value_info["f"+str(f)]["min"] = 0 + + Bayes_separate_table = {} + for f in range(num_features): + Bayes_separate_table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Bayes_separate_table['feature '+str(f)][inputs]={} + for c in range(num_classes): + if inputs > threshold: + x = 1 + else: + x = 0 + partial_neg_prob = np.log(1- np.exp(model_parmeters['c' + str(c)]["f"+str(f)]['log_prob'])) + partial_jll = x * (model_parmeters['c' + str(c)]["f"+str(f)]['log_prob'] - partial_neg_prob) + + value = copy.deepcopy(partial_jll + partial_neg_prob) + Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] = value + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + Bayes_separate_table["class prob"] = {} + for c in range(num_classes): + value = clf.class_log_prior_[c] + Bayes_separate_table["class prob"]['class '+str(c)]= value + if value > value_info["max"]: + value_info["max"] = value + if value < value_info["min"]: + value_info["min"] = value + + scale = (2**num_bits)/(num_features+1) + Exact_Table = {} + Exact_Table['class prob'] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = copy.deepcopy(Bayes_separate_table['class prob']['class '+str(c)]) + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['class prob']['class '+str(c)] = int(np.round(value*scale)) + + + + for f in range(num_features): + Exact_Table['feature '+str(f)] = {} + for inputs in range(0,feature_max[f]+1): + Exact_Table['feature ' + str(f)][inputs] = {} + for c in range(num_classes): + min_x = value_info["min"] + max_x = value_info["max"] + x = Bayes_separate_table['feature '+str(f)][inputs]["class "+str(c)] + value = Single_MaxMinNormalization(x, min_x, max_x) + Exact_Table['feature '+str(f)][inputs]["class "+str(c)] = int(np.round(value*scale)) + + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "Bayes" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + class_prob = np.zeros(num_classes).tolist() + + for c in range(num_classes): + class_prob[c] = Exact_Table['class prob']['class '+str(c)] + + + for f in range(num_features): + for c in range(num_classes): + class_prob[c] += Exact_Table['feature '+str(f)][str(input_feature_value[f])]['class '+str(c)] + # print(class_prob) + switch_prediction = class_prob.index(np.max(class_prob)) + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.4}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Bayes is', correct / np.shape(test_X.values)[0]) + result = classification_report(test_y, switch_test_y, digits=4) + print('\n', result) + + + + + + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['resource info'] = {} + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + + logical_dependency_memory = np.zeros(num_features+1) + logical_dependency_memory[0] = num_classes * ( Planter_config['p4 config']["action data bits"] + 8) + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + for s in range(num_features): + logical_dependency_memory[s+1] = Planter_config['p4 config']["feature tbl len"][s]*(Planter_config['p4 config']["action data bits"]+32) + + Planter_config['resource info']['logical memory per dependency'] = logical_dependency_memory.astype(int) + Planter_config['resource info']['total table bits'] = np.sum(logical_dependency_memory.astype(int)) + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is updated') + + + +if __name__ == "__main__": + resource_prediction() diff --git a/src/models/Bayes/readme.md b/src/models/Bayes/readme.md index f2c8f68..db4db6d 100644 --- a/src/models/Bayes/readme.md +++ b/src/models/Bayes/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported naïve Bayes. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported naïve Bayes. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_1/dedicated_p4.py b/src/models/DT/Type_1/dedicated_p4.py index 6ea6295..a864668 100755 --- a/src/models/DT/Type_1/dedicated_p4.py +++ b/src/models/DT/Type_1/dedicated_p4.py @@ -1,265 +1,265 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['code_tbl_depth'] = config_file['code table size'] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - headers.write(" bit<32> DstAddr;\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( - int(np.array(config['code_width'])[i])) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" \ - " meta.result = label;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":exact; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - - ingress.write(" table decision {\n" \ - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][j] - ingress.write("}\n") - ingress.write(" actions={read_lable;}\n") - ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" \ - " }\n\n") - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - Entry["action_params"]["tree"] = int(Exact_Table['feature '+str(f)][idx]) - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - - for idx in Exact_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Exact_Table['code to vote'][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(Exact_Table['code to vote'][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][f] - - - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - label = Exact_Table['feature ' + str(f)][idx] - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for idx in Exact_Table['code to vote']: - key_value = int(idx) - file.write("table_add SwitchIngress.decision read_lable ") - for f in range(num_features): - file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") - file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key]), int(config['width of code'][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - - tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") - tload.write("for key in table['code to vote']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write(" int(table['code to vote'][key]['leaf']))\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['code_tbl_depth'] = config_file['code table size'] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + headers.write(" bit<32> DstAddr;\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( + int(np.array(config['code_width'])[i])) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" \ + " meta.result = label;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":exact; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + + ingress.write(" table decision {\n" \ + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][j] + ingress.write("}\n") + ingress.write(" actions={read_lable;}\n") + ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" \ + " }\n\n") + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + Entry["action_params"]["tree"] = int(Exact_Table['feature '+str(f)][idx]) + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + + for idx in Exact_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Exact_Table['code to vote'][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(Exact_Table['code to vote'][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][f] + + + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + label = Exact_Table['feature ' + str(f)][idx] + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for idx in Exact_Table['code to vote']: + key_value = int(idx) + file.write("table_add SwitchIngress.decision read_lable ") + for f in range(num_features): + file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") + file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key]), int(config['width of code'][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + + tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") + tload.write("for key in table['code to vote']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write(" int(table['code to vote'][key]['leaf']))\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") + diff --git a/src/models/DT/Type_1/readme.md b/src/models/DT/Type_1/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_1/readme.md +++ b/src/models/DT/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_1/table_generator.py b/src/models/DT/Type_1/table_generator.py index 5cc6649..5a9f43a 100755 --- a/src/models/DT/Type_1/table_generator.py +++ b/src/models/DT/Type_1/table_generator.py @@ -1,456 +1,456 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import plotting -import copy -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import * -import re -import json -import math - -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - - # Decision Tree - model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - g_table = {} - g_table = generate_table(model, 0, num_features ,g_table, feature_max) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros( num_features) - for i in range(num_features): - code_width_tree_feature[i] = int(np.ceil(math.log( - g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - - Exact_Table = {} - - - Exact_Table['code to vote'] = g_table[0]['code to vote'] - - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] - Ternary_Table = copy.deepcopy(Exact_Table) - for f in range(num_features): - print('') - print('Begine transfer: Feature table ' + str(f)) - Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) - - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "DT" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - - Planter_config['p4 config']["code table size"] = len(Exact_Table['code to vote'].keys()) - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - - - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] - if not match_or_not: - print('feature table not matched') - if str(code_list) != str(ternary_code_list): - print('error in exact to ternary match', code_list, ternary_code_list) - - - for key in Exact_Table['code to vote']: - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) - match_or_not = True - break - if not match_or_not: - print('tree(code/code to vote) table not matched') - - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) - - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import plotting +import copy +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import * +import re +import json +import math + +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + + # Decision Tree + model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + g_table = {} + g_table = generate_table(model, 0, num_features ,g_table, feature_max) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros( num_features) + for i in range(num_features): + code_width_tree_feature[i] = int(np.ceil(math.log( + g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + + Exact_Table = {} + + + Exact_Table['code to vote'] = g_table[0]['code to vote'] + + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] + Ternary_Table = copy.deepcopy(Exact_Table) + for f in range(num_features): + print('') + print('Begine transfer: Feature table ' + str(f)) + Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) + + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "DT" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + + Planter_config['p4 config']["code table size"] = len(Exact_Table['code to vote'].keys()) + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + + + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] + if not match_or_not: + print('feature table not matched') + if str(code_list) != str(ternary_code_list): + print('error in exact to ternary match', code_list, ternary_code_list) + + + for key in Exact_Table['code to vote']: + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) + match_or_not = True + break + if not match_or_not: + print('tree(code/code to vote) table not matched') + + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) + + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) diff --git a/src/models/DT/Type_1_xsa/dedicated_p4.py b/src/models/DT/Type_1_xsa/dedicated_p4.py index 645df30..3f0e978 100755 --- a/src/models/DT/Type_1_xsa/dedicated_p4.py +++ b/src/models/DT/Type_1_xsa/dedicated_p4.py @@ -1,344 +1,344 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - # config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - # config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config, xsa_architecture=False): - # this should not be called for xsa_architecture - if xsa_architecture: - return - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - # for t in range(config['num_trees']): - # headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - # for t in range(config['num_trees']): - # headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_variables(fname, config, min_code_width=10): - with open(fname, 'a') as processing: - for i in range(0, config['num_features']): - processing.write( - " bit<" + str(max(int(config['code_width'][i]), min_code_width)) + "> code_f" + str(i) + ";\n") - processing.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") # what is this used for?? - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config, xsa_architecture=False): - # vitis P4 compiler requires minimum key width of 10 bits - if xsa_architecture: - min_code_width = 10 - else: - min_code_width = 0 - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - max(int(np.array(config['code_width'])[i]), min_code_width)) + "> meta_code, bit<" + str( - max(int(np.array(config['code_width'])[i]), min_code_width)) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n") - - if xsa_architecture: - ingress.write(" hdr.Planter.result = label;\n") - else: - ingress.write(" meta.result = label;\n") - - ingress.write(" }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n") - - if xsa_architecture: - ingress.write(" key = { hdr.Planter.feature" + str(i) + ":exact; }\n") - else: - ingress.write(" key = { meta.feature" + str(i) + ":exact; }\n") - - ingress.write(" actions = {\n") - - if xsa_architecture: - ingress.write(" extract_feature" + str(i) + "(code_f" + str(i) + ");\n") - else: - - ingress.write(" extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n") - - ingress.write(" NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - - # not entirely sure how this works?? - ingress.write(" table decision {\n" \ - " key = {\n") - for j in range(0, config['num_features']): - if xsa_architecture: - ingress.write(" code_f" + str(j) + ":exact;\n") - else: - ingress.write( - " meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") # not sure why indexing is necessary here - count_code[j] += config['code_width'][j] # why is this necessary?? - ingress.write(" }\n") - ingress.write(" actions={read_lable;}\n") - ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" \ - " }\n\n") - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - Entry["action_params"]["tree"] = int(Exact_Table['feature '+str(f)][idx]) - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - - for idx in Exact_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Exact_Table['code to vote'][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(Exact_Table['code to vote'][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][f] - - - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - label = Exact_Table['feature ' + str(f)][idx] - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for idx in Exact_Table['code to vote']: - key_value = int(idx) - file.write("table_add SwitchIngress.decision read_lable ") - for f in range(num_features): - file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") - file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") - - -def create_tables_Commend_esnet(fname, config): - # commands to load table entries using esnet-smartnic-fw - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - label = Exact_Table['feature ' + str(f)][idx] - file.write("table_add lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for idx in Exact_Table['code to vote']: - key_value = int(idx) - file.write("table_add decision read_lable ") - for f in range(num_features): - file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") - file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") - -def edit_tables_command_esnet_software(fname): - with open(fname, 'a') as file: - file.write( - "# run traffic\n" - "run_traffic packets\n\n" - "# end\n" - "exit\n") - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file_esnet_hardware = work_root + "/src/targets/alveo_u280/hardware/s1-commands.txt" - create_tables_Commend_esnet(commend_file_esnet_hardware, Planter_config) - - commend_file_esnet_software = work_root + "/src/targets/alveo_u280/behavioral/test_environment/sim/test-case0/runsim.txt" - create_tables_Commend_esnet(commend_file_esnet_software, Planter_config) - edit_tables_command_esnet_software(commend_file_esnet_software) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key]), int(config['width of code'][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - - tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") - tload.write("for key in table['code to vote']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write(" int(table['code to vote'][key]['leaf']))\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + # config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + # config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config, xsa_architecture=False): + # this should not be called for xsa_architecture + if xsa_architecture: + return + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + # for t in range(config['num_trees']): + # headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + # for t in range(config['num_trees']): + # headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_variables(fname, config, min_code_width=10): + with open(fname, 'a') as processing: + for i in range(0, config['num_features']): + processing.write( + " bit<" + str(max(int(config['code_width'][i]), min_code_width)) + "> code_f" + str(i) + ";\n") + processing.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") # what is this used for?? + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config, xsa_architecture=False): + # vitis P4 compiler requires minimum key width of 10 bits + if xsa_architecture: + min_code_width = 10 + else: + min_code_width = 0 + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + max(int(np.array(config['code_width'])[i]), min_code_width)) + "> meta_code, bit<" + str( + max(int(np.array(config['code_width'])[i]), min_code_width)) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n") + + if xsa_architecture: + ingress.write(" hdr.Planter.result = label;\n") + else: + ingress.write(" meta.result = label;\n") + + ingress.write(" }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n") + + if xsa_architecture: + ingress.write(" key = { hdr.Planter.feature" + str(i) + ":exact; }\n") + else: + ingress.write(" key = { meta.feature" + str(i) + ":exact; }\n") + + ingress.write(" actions = {\n") + + if xsa_architecture: + ingress.write(" extract_feature" + str(i) + "(code_f" + str(i) + ");\n") + else: + + ingress.write(" extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n") + + ingress.write(" NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + + # not entirely sure how this works?? + ingress.write(" table decision {\n" \ + " key = {\n") + for j in range(0, config['num_features']): + if xsa_architecture: + ingress.write(" code_f" + str(j) + ":exact;\n") + else: + ingress.write( + " meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") # not sure why indexing is necessary here + count_code[j] += config['code_width'][j] # why is this necessary?? + ingress.write(" }\n") + ingress.write(" actions={read_lable;}\n") + ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" \ + " }\n\n") + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + Entry["action_params"]["tree"] = int(Exact_Table['feature '+str(f)][idx]) + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + + for idx in Exact_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Exact_Table['code to vote'][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(Exact_Table['code to vote'][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][f] + + + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + label = Exact_Table['feature ' + str(f)][idx] + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for idx in Exact_Table['code to vote']: + key_value = int(idx) + file.write("table_add SwitchIngress.decision read_lable ") + for f in range(num_features): + file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") + file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") + + +def create_tables_Commend_esnet(fname, config): + # commands to load table entries using esnet-smartnic-fw + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + label = Exact_Table['feature ' + str(f)][idx] + file.write("table_add lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for idx in Exact_Table['code to vote']: + key_value = int(idx) + file.write("table_add decision read_lable ") + for f in range(num_features): + file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") + file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") + +def edit_tables_command_esnet_software(fname): + with open(fname, 'a') as file: + file.write( + "# run traffic\n" + "run_traffic packets\n\n" + "# end\n" + "exit\n") + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file_esnet_hardware = work_root + "/src/targets/alveo_u280/hardware/s1-commands.txt" + create_tables_Commend_esnet(commend_file_esnet_hardware, Planter_config) + + commend_file_esnet_software = work_root + "/src/targets/alveo_u280/behavioral/test_environment/sim/test-case0/runsim.txt" + create_tables_Commend_esnet(commend_file_esnet_software, Planter_config) + edit_tables_command_esnet_software(commend_file_esnet_software) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key]), int(config['width of code'][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + + tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") + tload.write("for key in table['code to vote']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write(" int(table['code to vote'][key]['leaf']))\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") + diff --git a/src/models/DT/Type_1_xsa/readme.md b/src/models/DT/Type_1_xsa/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_1_xsa/readme.md +++ b/src/models/DT/Type_1_xsa/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_1_xsa/table_generator.py b/src/models/DT/Type_1_xsa/table_generator.py index 3723ac1..037e2f1 100755 --- a/src/models/DT/Type_1_xsa/table_generator.py +++ b/src/models/DT/Type_1_xsa/table_generator.py @@ -1,454 +1,454 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import plotting -import copy -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import * -import re -import json -import math - -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - - # Decision Tree - model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - g_table = {} - g_table = generate_table(model, 0, num_features ,g_table, feature_max) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros( num_features) - for i in range(num_features): - code_width_tree_feature[i] = int(np.ceil(math.log( - g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - - Exact_Table = {} - - - Exact_Table['code to vote'] = g_table[0]['code to vote'] - - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] - Ternary_Table = copy.deepcopy(Exact_Table) - for f in range(num_features): - print('') - print('Begine transfer: Feature table ' + str(f)) - Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) - - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "DT" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - - Planter_config['p4 config']["code table size"] = len(Exact_Table['code to vote'].keys()) - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - - - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] - if not match_or_not: - print('feature table not matched') - if str(code_list) != str(ternary_code_list): - print('error in exact to ternary match', code_list, ternary_code_list) - - - for key in Exact_Table['code to vote']: - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) - match_or_not = True - break - if not match_or_not: - print('tree(code/code to vote) table not matched') - - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) - - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import plotting +import copy +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import * +import re +import json +import math + +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + + # Decision Tree + model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + g_table = {} + g_table = generate_table(model, 0, num_features ,g_table, feature_max) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros( num_features) + for i in range(num_features): + code_width_tree_feature[i] = int(np.ceil(math.log( + g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + + Exact_Table = {} + + + Exact_Table['code to vote'] = g_table[0]['code to vote'] + + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] + Ternary_Table = copy.deepcopy(Exact_Table) + for f in range(num_features): + print('') + print('Begine transfer: Feature table ' + str(f)) + Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) + + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "DT" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + + Planter_config['p4 config']["code table size"] = len(Exact_Table['code to vote'].keys()) + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + + + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] + if not match_or_not: + print('feature table not matched') + if str(code_list) != str(ternary_code_list): + print('error in exact to ternary match', code_list, ternary_code_list) + + + for key in Exact_Table['code to vote']: + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) + match_or_not = True + break + if not match_or_not: + print('tree(code/code to vote) table not matched') + + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) + + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) diff --git a/src/models/DT/Type_2/dedicated_p4.py b/src/models/DT/Type_2/dedicated_p4.py index ba63160..b6b99ed 100755 --- a/src/models/DT/Type_2/dedicated_p4.py +++ b/src/models/DT/Type_2/dedicated_p4.py @@ -1,276 +1,276 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - # config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - # config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default_lable'] = config_file["default lable"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - headers.write(" bit<32> DstAddr;\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( - int(np.array(config['code_width'])[i])) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" \ - " meta.result = label;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":exact; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - - ingress.write(" action write_default_class() {\n" - " meta.result = " + str(config['default_lable']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - - ingress.write(" table decision {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_class;\n" - " }\n") - ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" - " default_action = write_default_class;\n" - " }\n\n") - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - Entry["action_params"]["tree"] = int(Exact_Table['feature '+str(f)][idx]) - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - - for idx in Exact_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Exact_Table['code to vote'][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(Exact_Table['code to vote'][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][f] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - label = Exact_Table['feature ' + str(f)][idx] - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for idx in Exact_Table['code to vote']: - key_value = int(idx) - file.write("table_add SwitchIngress.decision read_lable ") - for f in range(num_features): - file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") - file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key]), int(config['width of code'][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") - tload.write("for key in table['code to vote']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write(" int(table['code to vote'][key]['leaf']))\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + # config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + # config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default_lable'] = config_file["default lable"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + headers.write(" bit<32> DstAddr;\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( + int(np.array(config['code_width'])[i])) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" \ + " meta.result = label;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":exact; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + + ingress.write(" action write_default_class() {\n" + " meta.result = " + str(config['default_lable']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + + ingress.write(" table decision {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_class;\n" + " }\n") + ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" + " default_action = write_default_class;\n" + " }\n\n") + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + Entry["action_params"]["tree"] = int(Exact_Table['feature '+str(f)][idx]) + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + + for idx in Exact_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Exact_Table['code to vote'][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(Exact_Table['code to vote'][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][f] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + label = Exact_Table['feature ' + str(f)][idx] + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for idx in Exact_Table['code to vote']: + key_value = int(idx) + file.write("table_add SwitchIngress.decision read_lable ") + for f in range(num_features): + file.write(str(Exact_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") + file.write("=> "+str(Exact_Table['code to vote'][idx]['leaf'])+"\n") + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key]), int(config['width of code'][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") + tload.write("for key in table['code to vote']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write(" int(table['code to vote'][key]['leaf']))\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") + diff --git a/src/models/DT/Type_2/readme.md b/src/models/DT/Type_2/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_2/readme.md +++ b/src/models/DT/Type_2/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_2/table_generator.py b/src/models/DT/Type_2/table_generator.py index 3ed819e..611b8dc 100755 --- a/src/models/DT/Type_2/table_generator.py +++ b/src/models/DT/Type_2/table_generator.py @@ -1,467 +1,467 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import plotting -import copy -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import * -import re -import json -import math - -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - - # Decision Tree - model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - g_table = {} - g_table = generate_table(model, 0, num_features ,g_table, feature_max) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros( num_features) - for i in range(num_features): - code_width_tree_feature[i] = int(np.ceil(math.log( - g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - - Exact_Table = {} - - - Exact_Table['code to vote'] = g_table[0]['code to vote'] - - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] - Ternary_Table = copy.deepcopy(Exact_Table) - for f in range(num_features): - print('') - print('Begine transfer: Feature table ' + str(f)) - Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) - - - collect_votes = [] - Ternary_Table['code to vote'] = {} - for idx in Exact_Table['code to vote']: - collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] - code_table_size = 0 - default_label = max(collect_votes , key = collect_votes.count) - for idx in Exact_Table['code to vote']: - if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: - Ternary_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] - code_table_size += 1 - - Exact_Table['code to vote'] = copy.deepcopy(Ternary_Table['code to vote']) - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "DT" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - - Planter_config['p4 config']["code table size"] = code_table_size - Planter_config['p4 config']["default lable"] = default_label - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - - - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] - if not match_or_not: - print('feature table not matched') - if str(code_list) != str(ternary_code_list): - print('error in exact to ternary match', code_list, ternary_code_list) - - - for key in Exact_Table['code to vote']: - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) - match_or_not = True - break - if not match_or_not: - - switch_prediction = Planter_config['p4 config']["default lable"] - - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - - print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import plotting +import copy +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import * +import re +import json +import math + +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + + # Decision Tree + model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + g_table = {} + g_table = generate_table(model, 0, num_features ,g_table, feature_max) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros( num_features) + for i in range(num_features): + code_width_tree_feature[i] = int(np.ceil(math.log( + g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + + Exact_Table = {} + + + Exact_Table['code to vote'] = g_table[0]['code to vote'] + + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] + Ternary_Table = copy.deepcopy(Exact_Table) + for f in range(num_features): + print('') + print('Begine transfer: Feature table ' + str(f)) + Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) + + + collect_votes = [] + Ternary_Table['code to vote'] = {} + for idx in Exact_Table['code to vote']: + collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] + code_table_size = 0 + default_label = max(collect_votes , key = collect_votes.count) + for idx in Exact_Table['code to vote']: + if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: + Ternary_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] + code_table_size += 1 + + Exact_Table['code to vote'] = copy.deepcopy(Ternary_Table['code to vote']) + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "DT" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + + Planter_config['p4 config']["code table size"] = code_table_size + Planter_config['p4 config']["default lable"] = default_label + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + + + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] + if not match_or_not: + print('feature table not matched') + if str(code_list) != str(ternary_code_list): + print('error in exact to ternary match', code_list, ternary_code_list) + + + for key in Exact_Table['code to vote']: + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) + match_or_not = True + break + if not match_or_not: + + switch_prediction = Planter_config['p4 config']["default lable"] + + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + + print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) diff --git a/src/models/DT/Type_3/dedicated_p4.py b/src/models/DT/Type_3/dedicated_p4.py index cc97397..d778c4f 100755 --- a/src/models/DT/Type_3/dedicated_p4.py +++ b/src/models/DT/Type_3/dedicated_p4.py @@ -1,308 +1,308 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - # config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - # config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default_lable'] = config_file["default lable"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - # for t in range(config['num_trees']): - # headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - # for t in range(config['num_trees']): - # headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( - int(np.array(config['code_width'])[i])) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" \ - " meta.result = label;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - - ingress.write(" action write_default_class() {\n" - " meta.result = " + str(config['default_lable']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - - ingress.write(" table decision {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_class;\n" - " }\n") - ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" - " default_action = write_default_class;\n" - " }\n\n") - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables(Planter_config): - # change this in topology.json to activate: "s1": {"runtime_json": "s1-runtime.json"} - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - label = Ternary_Table['feature ' + str(f)][idx][2] - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature" + str(f)] = {} - Entry["match"]["meta.feature" + str(f)] = [key, mask] - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - Entry["action_params"]["tree"] = int(label) - Entry["priority"] = priority - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - - for idx in Ternary_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Ternary_Table['code to vote'][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(Ternary_Table['code to vote'][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][f] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - label = Ternary_Table['feature ' + str(f)][idx][2] - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - for idx in Ternary_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - file.write("table_add SwitchIngress.decision read_lable ") - for f in range(num_features): - file.write(str(Ternary_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") - file.write("=> "+str(Ternary_Table['code to vote'][idx]['leaf'])+"\n") - - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - # Load feature tables - # for i in range(0, config['num_features']): - # tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( - # i) + "'].keys()),'entries')\n" \ - # "for key in table['feature " + str(i) + "']:\n") - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(table['feature " + str(i) + "'][key][1], table['feature " + str(i) + "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - - tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") - tload.write("for key in table['code to vote']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write(" int(table['code to vote'][key]['leaf']))\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + # config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + # config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default_lable'] = config_file["default lable"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + # for t in range(config['num_trees']): + # headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + # for t in range(config['num_trees']): + # headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( + int(np.array(config['code_width'])[i])) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" \ + " meta.result = label;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + + ingress.write(" action write_default_class() {\n" + " meta.result = " + str(config['default_lable']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + + ingress.write(" table decision {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_class;\n" + " }\n") + ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" + " default_action = write_default_class;\n" + " }\n\n") + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables(Planter_config): + # change this in topology.json to activate: "s1": {"runtime_json": "s1-runtime.json"} + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + label = Ternary_Table['feature ' + str(f)][idx][2] + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature" + str(f)] = {} + Entry["match"]["meta.feature" + str(f)] = [key, mask] + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + Entry["action_params"]["tree"] = int(label) + Entry["priority"] = priority + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + + for idx in Ternary_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Ternary_Table['code to vote'][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(Ternary_Table['code to vote'][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][f] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + label = Ternary_Table['feature ' + str(f)][idx][2] + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + for idx in Ternary_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + file.write("table_add SwitchIngress.decision read_lable ") + for f in range(num_features): + file.write(str(Ternary_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") + file.write("=> "+str(Ternary_Table['code to vote'][idx]['leaf'])+"\n") + + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + # Load feature tables + # for i in range(0, config['num_features']): + # tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( + # i) + "'].keys()),'entries')\n" \ + # "for key in table['feature " + str(i) + "']:\n") + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(table['feature " + str(i) + "'][key][1], table['feature " + str(i) + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + + tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") + tload.write("for key in table['code to vote']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write(" int(table['code to vote'][key]['leaf']))\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") + diff --git a/src/models/DT/Type_3/readme.md b/src/models/DT/Type_3/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_3/readme.md +++ b/src/models/DT/Type_3/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_3/table_generator.py b/src/models/DT/Type_3/table_generator.py index 371bd0a..2c50a57 100755 --- a/src/models/DT/Type_3/table_generator.py +++ b/src/models/DT/Type_3/table_generator.py @@ -1,504 +1,504 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import plotting -import copy -import time -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import * -import re -import json -import math - -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if l == 0: - text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - else: - text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Decision Tree - model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - g_table = {} - g_table = generate_table(model, 0, num_features ,g_table, feature_max) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros( num_features) - for i in range(num_features): - code_width_tree_feature[i] = int(np.ceil(math.log( - g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - - Exact_Table = {} - - - Exact_Table['code to vote'] = g_table[0]['code to vote'] - - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] - Ternary_Table = copy.deepcopy(Exact_Table) - for f in range(num_features): - print('') - print('Begine transfer: Feature table ' + str(f)) - Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) - - - # prepare default - collect_votes = [] - Ternary_Table['code to vote'] = {} - for idx in Exact_Table['code to vote']: - collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] - code_table_size = 0 - default_label = max(collect_votes , key = collect_votes.count) - for idx in Exact_Table['code to vote']: - if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: - Ternary_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] - code_table_size += 1 - Exact_Table['code to vote'] = copy.deepcopy(Ternary_Table['code to vote']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "DT" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - - Planter_config['p4 config']["code table size"] = code_table_size - Planter_config['p4 config']["default lable"] = default_label - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - - - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - if int(input_feature_value[f]) & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(int(input_feature_value[f]))] - if not match_or_not: - print('feature table not matched') - if str(code_list) != str(ternary_code_list): - print('error in exact to ternary match', code_list, ternary_code_list) - - - for key in Exact_Table['code to vote']: - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) - match_or_not = True - break - if not match_or_not: - - switch_prediction = Planter_config['p4 config']["default lable"] - - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"]) ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import plotting +import copy +import time +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import * +import re +import json +import math + +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if l == 0: + text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + else: + text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Decision Tree + model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + g_table = {} + g_table = generate_table(model, 0, num_features ,g_table, feature_max) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros( num_features) + for i in range(num_features): + code_width_tree_feature[i] = int(np.ceil(math.log( + g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + + Exact_Table = {} + + + Exact_Table['code to vote'] = g_table[0]['code to vote'] + + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] + Ternary_Table = copy.deepcopy(Exact_Table) + for f in range(num_features): + print('') + print('Begine transfer: Feature table ' + str(f)) + Ternary_Table['feature ' + str(f)] = Table_to_TCAM(Ternary_Table['feature ' + str(f)], feature_width[f]) + + + # prepare default + collect_votes = [] + Ternary_Table['code to vote'] = {} + for idx in Exact_Table['code to vote']: + collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] + code_table_size = 0 + default_label = max(collect_votes , key = collect_votes.count) + for idx in Exact_Table['code to vote']: + if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: + Ternary_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] + code_table_size += 1 + Exact_Table['code to vote'] = copy.deepcopy(Ternary_Table['code to vote']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "DT" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + + Planter_config['p4 config']["code table size"] = code_table_size + Planter_config['p4 config']["default lable"] = default_label + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + + + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + if int(input_feature_value[f]) & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(int(input_feature_value[f]))] + if not match_or_not: + print('feature table not matched') + if str(code_list) != str(ternary_code_list): + print('error in exact to ternary match', code_list, ternary_code_list) + + + for key in Exact_Table['code to vote']: + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) + match_or_not = True + break + if not match_or_not: + + switch_prediction = Planter_config['p4 config']["default lable"] + + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"]) ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/DT/Type_4/dedicated_p4.py b/src/models/DT/Type_4/dedicated_p4.py index 72b8c5a..c6d8469 100755 --- a/src/models/DT/Type_4/dedicated_p4.py +++ b/src/models/DT/Type_4/dedicated_p4.py @@ -1,309 +1,309 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_width'] = config_file["width of feature"] - config['feature_table_depth'] = config_file['used columns'] - # config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - # config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default_lable'] = config_file["default lable"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - # for t in range(config['num_trees']): - # headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - # for t in range(config['num_trees']): - # headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( - int(np.array(config['code_width'])[i])) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" \ - " meta.result = label;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - - ingress.write(" action write_default_class() {\n" - " meta.result = " + str(config['default_lable']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - - ingress.write(" table decision {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_class;\n" - " }\n") - ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" - " default_action = write_default_class;\n" - " }\n\n") - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables(Planter_config): - # change this in topology.json to activate: "s1": {"runtime_json": "s1-runtime.json"} - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - label = LPM_Table['feature ' + str(f)][idx][2] - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature" + str(f)] = {} - Entry["match"]["meta.feature" + str(f)] = [key, mask] - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - Entry["action_params"]["tree"] = int(label) - Entry["priority"] = priority - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - - for idx in LPM_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(LPM_Table['code to vote'][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(LPM_Table['code to vote'][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][f] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - label = LPM_Table['feature ' + str(f)][idx][2] - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+ - "."+str(((1<<8)-1)&(key))+"/"+str(32- int(math.log(2**config['p4 config']["width of feature"][f] - mask,2)))+" => "+str(label)+" \n") - - file.write("\n") - - for idx in LPM_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - file.write("table_add SwitchIngress.decision read_lable ") - for f in range(num_features): - file.write(str(LPM_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") - file.write("=> "+str(LPM_Table['code to vote'][idx]['leaf'])+"\n") - - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") - tload.write("for key in table['code to vote']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write(" int(table['code to vote'][key]['leaf']))\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.dump()" + "\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_width'] = config_file["width of feature"] + config['feature_table_depth'] = config_file['used columns'] + # config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + # config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default_lable'] = config_file["default lable"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + # for t in range(config['num_trees']): + # headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + # for t in range(config['num_trees']): + # headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( + int(np.array(config['code_width'])[i])) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" \ + " meta.result = label;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + + ingress.write(" action write_default_class() {\n" + " meta.result = " + str(config['default_lable']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + + ingress.write(" table decision {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_class;\n" + " }\n") + ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" + " default_action = write_default_class;\n" + " }\n\n") + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables(Planter_config): + # change this in topology.json to activate: "s1": {"runtime_json": "s1-runtime.json"} + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + label = LPM_Table['feature ' + str(f)][idx][2] + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature" + str(f)] = {} + Entry["match"]["meta.feature" + str(f)] = [key, mask] + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + Entry["action_params"]["tree"] = int(label) + Entry["priority"] = priority + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + + for idx in LPM_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(LPM_Table['code to vote'][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(LPM_Table['code to vote'][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][f] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + label = LPM_Table['feature ' + str(f)][idx][2] + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+ + "."+str(((1<<8)-1)&(key))+"/"+str(32- int(math.log(2**config['p4 config']["width of feature"][f] - mask,2)))+" => "+str(label)+" \n") + + file.write("\n") + + for idx in LPM_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + file.write("table_add SwitchIngress.decision read_lable ") + for f in range(num_features): + file.write(str(LPM_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") + file.write("=> "+str(LPM_Table['code to vote'][idx]['leaf'])+"\n") + + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") + tload.write("for key in table['code to vote']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write(" int(table['code to vote'][key]['leaf']))\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.dump()" + "\n\n") diff --git a/src/models/DT/Type_4/readme.md b/src/models/DT/Type_4/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_4/readme.md +++ b/src/models/DT/Type_4/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_4/table_generator.py b/src/models/DT/Type_4/table_generator.py index f4da52d..c835fd1 100755 --- a/src/models/DT/Type_4/table_generator.py +++ b/src/models/DT/Type_4/table_generator.py @@ -1,506 +1,506 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import plotting -import copy -import time -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import * -import re -import json -import math - -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if l == 0: - text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - else: - text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Decision Tree - model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - g_table = {} - g_table = generate_table(model, 0, num_features ,g_table, feature_max) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros( num_features) - for i in range(num_features): - code_width_tree_feature[i] = int(np.ceil(math.log( - g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - - Exact_Table = {} - - - Exact_Table['code to vote'] = g_table[0]['code to vote'] - - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] - LPM_Table = copy.deepcopy(Exact_Table) - for f in range(num_features): - print('') - print('Begine transfer: Feature table ' + str(f)) - LPM_Table['feature ' + str(f)] = Table_to_LPM(LPM_Table['feature ' + str(f)], feature_width[f]) - - - # prepare default - collect_votes = [] - LPM_Table['code to vote'] = {} - for idx in Exact_Table['code to vote']: - collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] - code_table_size = 0 - default_label = max(collect_votes , key = collect_votes.count) - for idx in Exact_Table['code to vote']: - if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: - LPM_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] - code_table_size += 1 - Exact_Table['code to vote'] = copy.deepcopy(LPM_Table['code to vote']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) - print('\nLPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "DT" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - - Planter_config['p4 config']["code table size"] = code_table_size - Planter_config['p4 config']["default lable"] = default_label - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - - - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index] # Choose the action with the longest prefix match - - - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] - - if str(code_list) != str(lpm_code_list): - print('error in exact to lpm match', code_list, lpm_code_list) - - - for key in Exact_Table['code to vote']: - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) - match_or_not = True - break - if not match_or_not: - - switch_prediction = Planter_config['p4 config']["default lable"] - - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"]) ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import plotting +import copy +import time +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import * +import re +import json +import math + +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if l == 0: + text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + else: + text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Decision Tree + model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + g_table = {} + g_table = generate_table(model, 0, num_features ,g_table, feature_max) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros( num_features) + for i in range(num_features): + code_width_tree_feature[i] = int(np.ceil(math.log( + g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + + Exact_Table = {} + + + Exact_Table['code to vote'] = g_table[0]['code to vote'] + + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] + LPM_Table = copy.deepcopy(Exact_Table) + for f in range(num_features): + print('') + print('Begine transfer: Feature table ' + str(f)) + LPM_Table['feature ' + str(f)] = Table_to_LPM(LPM_Table['feature ' + str(f)], feature_width[f]) + + + # prepare default + collect_votes = [] + LPM_Table['code to vote'] = {} + for idx in Exact_Table['code to vote']: + collect_votes += [int(Exact_Table['code to vote'][idx]['leaf'])] + code_table_size = 0 + default_label = max(collect_votes , key = collect_votes.count) + for idx in Exact_Table['code to vote']: + if int(Exact_Table['code to vote'][idx]['leaf']) != default_label: + LPM_Table['code to vote'][code_table_size] = Exact_Table['code to vote'][idx] + code_table_size += 1 + Exact_Table['code to vote'] = copy.deepcopy(LPM_Table['code to vote']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) + print('\nLPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "DT" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + + Planter_config['p4 config']["code table size"] = code_table_size + Planter_config['p4 config']["default lable"] = default_label + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + + + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index] # Choose the action with the longest prefix match + + + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] + + if str(code_list) != str(lpm_code_list): + print('error in exact to lpm match', code_list, lpm_code_list) + + + for key in Exact_Table['code to vote']: + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table['code to vote'][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + switch_prediction = int(Exact_Table['code to vote'][key]['leaf']) + match_or_not = True + break + if not match_or_not: + + switch_prediction = Planter_config['p4 config']["default lable"] + + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"]) ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/DT/Type_5/dedicated_p4.py b/src/models/DT/Type_5/dedicated_p4.py index c67eec5..ff4496a 100755 --- a/src/models/DT/Type_5/dedicated_p4.py +++ b/src/models/DT/Type_5/dedicated_p4.py @@ -1,252 +1,252 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_width'] = config_file["width of feature"] - config['feature_table_depth'] = config_file['used columns'] - config['code_tbl_depth'] = config_file['code table size'] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default_lable'] = config_file["default lable"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - headers.write(" bit<32> DstAddr;\n") - headers.write(" bit<32> decision_key;\n") - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - - ingress.write(" meta.decision_key["+str(int(np.sum(config["code_width"])-1))+ ":0] = ") - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for j in range(0, config['num_features']): - if j!=config['num_features']-1: - ingress.write("meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "] ++ ") - else: - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "] ;\n") - ingress.write(" decision.apply();\n") - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( - int(np.array(config['code_width'])[i])) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" \ - " meta.result = label;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - - ingress.write(" action write_default_class() {\n" - " meta.result = " + str(config['default_lable']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - - ingress.write(" table decision {\n" - " key = { ") - ingress.write("meta.decision_key:lpm;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_class;\n" - " }\n") - ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" - " default_action = write_default_class;\n" - " }\n\n") - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - label = LPM_Table['feature ' + str(f)][idx][2] - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+ - "."+str(((1<<8)-1)&(key))+"/"+str(32- int(math.log(2**config['p4 config']["width of feature"][f] - mask,2)))+" => "+str(label)+" \n") - - file.write("\n") - - for idx in LPM_Table['code to vote']: - key = LPM_Table['code to vote'][idx][1] - mask = LPM_Table['code to vote'][idx][0] - label = LPM_Table['code to vote'][idx][2] - file.write("table_add SwitchIngress.decision read_lable " + str(((1 << 9) - 1) & (key >> 24)) + "." + str( - ((1 << 9) - 1) & (key >> 16)) + "." + str(((1 << 9) - 1) & (key >> 8)) + "." + str(((1 << 9) - 1) & (key)) + - "/"+str(32 - int(math.log(2 ** np.sum(config['p4 config']["width of code"]) - mask, 2))) + " => " + str( label) + " \n") - - - - - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - - commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") - tload.write("for key in table['code to vote']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable" + \ - "(table['code to vote'][key][1], int(32 - int(math.log(2 ** config['total width of code'] - table['code to vote'][key][0], 2))), table['code to vote'][key][2])\n") - - tload.write((config['debug_load_table']) * ('# ') + "Ingress.dump()" + "\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_width'] = config_file["width of feature"] + config['feature_table_depth'] = config_file['used columns'] + config['code_tbl_depth'] = config_file['code table size'] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default_lable'] = config_file["default lable"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + headers.write(" bit<32> DstAddr;\n") + headers.write(" bit<32> decision_key;\n") + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + + ingress.write(" meta.decision_key["+str(int(np.sum(config["code_width"])-1))+ ":0] = ") + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for j in range(0, config['num_features']): + if j!=config['num_features']-1: + ingress.write("meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "] ++ ") + else: + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "] ;\n") + ingress.write(" decision.apply();\n") + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( + int(np.array(config['code_width'])[i])) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" \ + " meta.result = label;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + + ingress.write(" action write_default_class() {\n" + " meta.result = " + str(config['default_lable']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + + ingress.write(" table decision {\n" + " key = { ") + ingress.write("meta.decision_key:lpm;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_class;\n" + " }\n") + ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" + " default_action = write_default_class;\n" + " }\n\n") + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + label = LPM_Table['feature ' + str(f)][idx][2] + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+ + "."+str(((1<<8)-1)&(key))+"/"+str(32- int(math.log(2**config['p4 config']["width of feature"][f] - mask,2)))+" => "+str(label)+" \n") + + file.write("\n") + + for idx in LPM_Table['code to vote']: + key = LPM_Table['code to vote'][idx][1] + mask = LPM_Table['code to vote'][idx][0] + label = LPM_Table['code to vote'][idx][2] + file.write("table_add SwitchIngress.decision read_lable " + str(((1 << 9) - 1) & (key >> 24)) + "." + str( + ((1 << 9) - 1) & (key >> 16)) + "." + str(((1 << 9) - 1) & (key >> 8)) + "." + str(((1 << 9) - 1) & (key)) + + "/"+str(32 - int(math.log(2 ** np.sum(config['p4 config']["width of code"]) - mask, 2))) + " => " + str( label) + " \n") + + + + + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + + commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") + tload.write("for key in table['code to vote']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable" + \ + "(table['code to vote'][key][1], int(32 - int(math.log(2 ** config['total width of code'] - table['code to vote'][key][0], 2))), table['code to vote'][key][2])\n") + + tload.write((config['debug_load_table']) * ('# ') + "Ingress.dump()" + "\n\n") diff --git a/src/models/DT/Type_5/readme.md b/src/models/DT/Type_5/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_5/readme.md +++ b/src/models/DT/Type_5/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_5/table_generator.py b/src/models/DT/Type_5/table_generator.py index d88341e..5f70677 100755 --- a/src/models/DT/Type_5/table_generator.py +++ b/src/models/DT/Type_5/table_generator.py @@ -1,499 +1,499 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import plotting -import copy -import time -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import * -import re -import json -import math - -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.Muti_Exact_to_LPM import * -from src.functions.json_encoder import * - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Decision Tree - model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - g_table = {} - g_table = generate_table(model, 0, num_features ,g_table, feature_max) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros( num_features) - for i in range(num_features): - code_width_tree_feature[i] = int(np.ceil(math.log( - g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - - Exact_Table = {} - - - Exact_Table['code to vote'] = g_table[0]['code to vote'] - - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] - LPM_Table = copy.deepcopy(Exact_Table) - for f in range(num_features): - print('') - print('Begine transfer: Feature table ' + str(f)) - LPM_Table['feature ' + str(f)] = Table_to_LPM(LPM_Table['feature ' + str(f)], feature_width[f]) - - - print('') - print('Begine transfer: Tree table ') - key_name = [] - for f in range(num_features): - key_name += ['f'+str(f)+' code'] - action_name = 'leaf' - - # prepare default - LPM_Table['code to vote'] = Muti_Exact_to_LPM_Concatination(Exact_Table['code to vote'], code_width_tree_feature, key_name, action_name) - code_table_size = len(LPM_Table['code to vote'].keys()) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) - print('\nLPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "DT" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - - Planter_config['p4 config']["code table size"] = code_table_size - Planter_config['p4 config']["default lable"] = 0 - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["total width of code"] = int(np.sum(code_width_tree_feature)) - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - - - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index] # Choose the action with the longest prefix match - - - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] - - if str(code_list) != str(lpm_code_list): - print('error in exact to lpm match', code_list, lpm_code_list) - - binary_code = '' - for f in range(num_features): - binary_code = binary_code + ten_to_bin(int(code_list[f]), int(Planter_config['p4 config']["width of code"][f])) - decimal_code = int(binary_code, 2) - - LPM_table = LPM_Table['code to vote'] - keys = list(LPM_table.keys()) - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - switch_prediction= action[max_index] # Choose the action with the longest prefix match - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('LPM match entries: ',np.sum(Planter_config['p4 config']["code table size"]) +np.sum(Planter_config['p4 config']["used columns"])) - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import plotting +import copy +import time +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import * +import re +import json +import math + +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.Muti_Exact_to_LPM import * +from src.functions.json_encoder import * + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Decision Tree + model=DecisionTreeClassifier(max_depth=num_depth,max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + g_table = {} + g_table = generate_table(model, 0, num_features ,g_table, feature_max) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros( num_features) + for i in range(num_features): + code_width_tree_feature[i] = int(np.ceil(math.log( + g_table[0]['feature ' + str(i)][np.max(list(g_table[0]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + + Exact_Table = {} + + + Exact_Table['code to vote'] = g_table[0]['code to vote'] + + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][value] = g_table[0]["feature " + str(f)][value] + LPM_Table = copy.deepcopy(Exact_Table) + for f in range(num_features): + print('') + print('Begine transfer: Feature table ' + str(f)) + LPM_Table['feature ' + str(f)] = Table_to_LPM(LPM_Table['feature ' + str(f)], feature_width[f]) + + + print('') + print('Begine transfer: Tree table ') + key_name = [] + for f in range(num_features): + key_name += ['f'+str(f)+' code'] + action_name = 'leaf' + + # prepare default + LPM_Table['code to vote'] = Muti_Exact_to_LPM_Concatination(Exact_Table['code to vote'], code_width_tree_feature, key_name, action_name) + code_table_size = len(LPM_Table['code to vote'].keys()) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) + print('\nLPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "DT" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + + Planter_config['p4 config']["code table size"] = code_table_size + Planter_config['p4 config']["default lable"] = 0 + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["total width of code"] = int(np.sum(code_width_tree_feature)) + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + + + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index] # Choose the action with the longest prefix match + + + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])] + + if str(code_list) != str(lpm_code_list): + print('error in exact to lpm match', code_list, lpm_code_list) + + binary_code = '' + for f in range(num_features): + binary_code = binary_code + ten_to_bin(int(code_list[f]), int(Planter_config['p4 config']["width of code"][f])) + decimal_code = int(binary_code, 2) + + LPM_table = LPM_Table['code to vote'] + keys = list(LPM_table.keys()) + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + switch_prediction= action[max_index] # Choose the action with the longest prefix match + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Decision Tree is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('LPM match entries: ',np.sum(Planter_config['p4 config']["code table size"]) +np.sum(Planter_config['p4 config']["used columns"])) + + + diff --git a/src/models/DT/Type_DM/dedicated_p4.py b/src/models/DT/Type_DM/dedicated_p4.py index ce3b0cb..3b05b2e 100755 --- a/src/models/DT/Type_DM/dedicated_p4.py +++ b/src/models/DT/Type_DM/dedicated_p4.py @@ -1,275 +1,275 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['num_depth'] = config_file["number of depth"] - config['model'] = config_file['model'] - config["decision_table_size"] = config_file["decision table size"] - config['default label'] = config_file["default label"] - config['first_entry'] = config_file["first entry info"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n" - "#define CLASS_NOT_SET 10\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - - headers.write("#define CLASS_NOT_SET 10\n\n") - # headers.write("struct metadata_t {\n") - for t in range(0, config['num_trees']): - headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") - headers.write(" bit<16> node_id;\n" - " bit<16> prevFeature;\n" - " bit<16> isTrue;\n" - " bit<32> DstAddr;\n") - # "}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") - ingress.write("\n") - for t in range(0, config['num_trees']): - ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" - " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" - " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") - for d in range(0, config['num_depth']): - ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") - ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") - ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") - ingress.write(" " + (config['num_depth']) * "} " + "\n\n") - - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" - " bit<32> feature = 0;\n" - # " bit<32> th = threshold ;\n" - " bit<16> f = f_inout ;\n") - for f in range(0, config['num_features']): - ingress.write(" if (f == "+str(f)+") {\n" - " feature = hdr.Planter.feature"+str(f)+";\n" - " }\n") - ingress.write(" bit<32> th = threshold - feature;\n" - # " if (feature <= th){\n" # if (feature <= th){ - " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ - " meta.isTrue = 1;\n" - " }else{\n" - " meta.isTrue = 0;\n" - " }\n" - " meta.prevFeature = f;\n" - " meta.node_id = node_id;\n") - - ingress.write(" }\n\n") - - for t in range(0, config['num_trees']): - ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" - " meta.tree_" + str(t+1) + "_vote = class;\n" - " meta.node_id = node_id; // just for debugging otherwise not needed\n" - " }\n") - - - for t in range(0, config['num_trees']): - for d in range(0, config['num_depth']+1): - ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" - " key = {\n" - " meta.node_id: exact;\n" - " meta.prevFeature: exact;\n" - " meta.isTrue: exact;\n" - " }\n" - " actions = {\n" - " NoAction;\n" - " CheckFeature;\n" - " SetClass"+str(t+1)+";\n" - " }\n" - " size = 1024;\n" - " }\n\n") - - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_command(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Table = json.load(open('Tables/Exact_Table.json', 'r')) - - fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' - - - with open(fname_current, 'a') as file: - for idx in Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") - file.write("=> " + str(Table['decision'][idx]['class']) + "\n") - - - - with open(fname, 'w') as command: - command.write('') - current_file = open(fname_current, 'r') - total_entries = 0 - for line in current_file: - new_file = open(fname, 'a') # Use append mode here - new_file.write(line) - total_entries += 1 - print('Actual exact table entries:', total_entries, '...', end='') - current_file.close() - new_file.close() - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - table = json.load(open('./Tables/Exact_Table.json', 'r')) - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - - # Load decision tables - tload.write("print('load table for each nodes')\n") - for idx in table['node table']: - if table['node table'][idx][0] == "CheckFeature": - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] + ', ' + table['node table'][idx][7] - else: - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] - - tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ - '('+key_and_values+')\n') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['num_depth'] = config_file["number of depth"] + config['model'] = config_file['model'] + config["decision_table_size"] = config_file["decision table size"] + config['default label'] = config_file["default label"] + config['first_entry'] = config_file["first entry info"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n" + "#define CLASS_NOT_SET 10\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + + headers.write("#define CLASS_NOT_SET 10\n\n") + # headers.write("struct metadata_t {\n") + for t in range(0, config['num_trees']): + headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") + headers.write(" bit<16> node_id;\n" + " bit<16> prevFeature;\n" + " bit<16> isTrue;\n" + " bit<32> DstAddr;\n") + # "}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") + ingress.write("\n") + for t in range(0, config['num_trees']): + ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" + " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" + " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") + for d in range(0, config['num_depth']): + ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") + ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") + ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") + ingress.write(" " + (config['num_depth']) * "} " + "\n\n") + + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" + " bit<32> feature = 0;\n" + # " bit<32> th = threshold ;\n" + " bit<16> f = f_inout ;\n") + for f in range(0, config['num_features']): + ingress.write(" if (f == "+str(f)+") {\n" + " feature = hdr.Planter.feature"+str(f)+";\n" + " }\n") + ingress.write(" bit<32> th = threshold - feature;\n" + # " if (feature <= th){\n" # if (feature <= th){ + " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ + " meta.isTrue = 1;\n" + " }else{\n" + " meta.isTrue = 0;\n" + " }\n" + " meta.prevFeature = f;\n" + " meta.node_id = node_id;\n") + + ingress.write(" }\n\n") + + for t in range(0, config['num_trees']): + ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" + " meta.tree_" + str(t+1) + "_vote = class;\n" + " meta.node_id = node_id; // just for debugging otherwise not needed\n" + " }\n") + + + for t in range(0, config['num_trees']): + for d in range(0, config['num_depth']+1): + ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" + " key = {\n" + " meta.node_id: exact;\n" + " meta.prevFeature: exact;\n" + " meta.isTrue: exact;\n" + " }\n" + " actions = {\n" + " NoAction;\n" + " CheckFeature;\n" + " SetClass"+str(t+1)+";\n" + " }\n" + " size = 1024;\n" + " }\n\n") + + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_command(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Table = json.load(open('Tables/Exact_Table.json', 'r')) + + fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' + + + with open(fname_current, 'a') as file: + for idx in Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") + file.write("=> " + str(Table['decision'][idx]['class']) + "\n") + + + + with open(fname, 'w') as command: + command.write('') + current_file = open(fname_current, 'r') + total_entries = 0 + for line in current_file: + new_file = open(fname, 'a') # Use append mode here + new_file.write(line) + total_entries += 1 + print('Actual exact table entries:', total_entries, '...', end='') + current_file.close() + new_file.close() + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + table = json.load(open('./Tables/Exact_Table.json', 'r')) + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + + # Load decision tables + tload.write("print('load table for each nodes')\n") + for idx in table['node table']: + if table['node table'][idx][0] == "CheckFeature": + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + ', ' + table['node table'][idx][7] + else: + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + + tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ + '('+key_and_values+')\n') diff --git a/src/models/DT/Type_DM/readme.md b/src/models/DT/Type_DM/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_DM/readme.md +++ b/src/models/DT/Type_DM/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_DM/table_generator.py b/src/models/DT/Type_DM/table_generator.py index 8e729d2..47c2624 100755 --- a/src/models/DT/Type_DM/table_generator.py +++ b/src/models/DT/Type_DM/table_generator.py @@ -1,398 +1,398 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh -# If you want to use this type of model, -# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -import time -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -from sklearn import tree -from sklearn.tree import export_text -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier - -# i_tree = 0 -# -# global_id = 0 - - -def export_p4(decision_tree, fname): - tree_ = decision_tree.tree_ - class_names = decision_tree.classes_ - right_child_fmt = "{} {} <= {}\n" - left_child_fmt = "{} {} > {}\n" - truncation_fmt = "{} {}\n" - feature_names_ = ["{}".format(i) for i in tree_.feature] - export_text.report = "" - max_depth = 10 - spacing = 3 - decimals = 2 - show_weights = False - - if isinstance(decision_tree, DecisionTreeClassifier): - value_fmt = "{}{} weights: {}\n" - if not show_weights: - value_fmt = "{}{}{}\n" - else: - value_fmt = "{}{} value: {}\n" - - def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): - global global_id - global i_tree - global Exact_Table - - current_id = global_id - - val = '' - is_classification = isinstance(decision_tree, - DecisionTreeClassifier) - if show_weights or not is_classification: - val = ["{1:.{0}f}, ".format(decimals, v) for v in value] - val = '[' + ''.join(val)[:-2] + ']' - if is_classification: - val += ' class: ' + str(class_name) - export_text.report += value_fmt.format(indent, '', val) - # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, - # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ - " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ - str(int(float(class_name))) +"\n") - - Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), - "level_" + str(i_tree) + "_" + str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), - str(int(float(class_name)))] - Exact_Table['node table counter'] += 1 - - - - - - def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): - indent = ("|" + (" " * spacing)) * depth - indent = indent[:-spacing] + "-" * spacing - global global_id - global i_tree - global Exact_Table - - global_id = global_id + 1 - current_id = global_id - - value = None - if tree_.n_outputs == 1: - value = tree_.value[node][0] - else: - value = tree_.value[node].T[0] - class_name = np.argmax(value) - - if (tree_.n_classes[0] != 1 and - tree_.n_outputs == 1): - class_name = class_names[class_name] - - if depth <= max_depth + 1: - info_fmt = "" - info_fmt_left = info_fmt - info_fmt_right = info_fmt - - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_names_[node] - threshold = tree_.threshold[node] - threshold = "{1:.{0}f}".format(decimals, threshold) - export_text.report += right_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_left - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ - str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + - " " + str(name) + " " + str(int(float(threshold)))+"\n") - global first_entry - global entry_info - global Exact_Table - - Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), str(name) , - str(int(float(threshold)))] - Exact_Table['node table counter'] += 1 - - if first_entry: - first_entry = False - entry_info += [[previous_id, prevfeature, result]] - - print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) - - export_text.report += left_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_right - # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) - - print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) - else: # leaf - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - subtree_depth = _compute_depth(tree_, node) - if subtree_depth == 1: - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - trunc_report = 'truncated branch of depth %d' % subtree_depth - export_text.report += truncation_fmt.format(indent, - trunc_report) - - print_tree_recurse(0, 1, 0, 1, global_id, fname) - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = 1 - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Decision Tree - model = DecisionTreeClassifier(max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' - # refresh the command (Table) file - with open(fname, 'w') as command: - command.write('') - - global global_id - global i_tree - global first_entry - global entry_info - global Exact_Table - - i_tree = 0 - global_id = 0 - entry_info = [] - Exact_Table = {} - Exact_Table['node table'] = {} - Exact_Table['node table counter'] = 0 - - estimator = model - idx = 0 - with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: - f.write('') - with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: - get_lineage(estimator, feature_names, f) - first_entry = True - i_tree = i_tree + 1 - export_p4(estimator, fname) - # print(entry_info) - - g_table = {} - print("Generating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - g_table['decision'] = g_table['votes to class'] - - collect_class = [] - for idx in g_table['decision']: - collect_class += [g_table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Exact_Table['decision'] = {} - for idx in g_table['decision']: - if g_table['decision'][idx]['class'] != default_class: - Exact_Table['decision'][code_table_size] = g_table['decision'][idx] - code_table_size += 1 - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Depth_Based_Table.txt and Exact_Table.json is generated') - - - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']["number of depth"] = num_depth - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["first entry info"] = entry_info - Planter_config['p4 config']["default label"] = default_class - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - print('The python simulation test does not support this model, please do the following emulation test on the software switch.') - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of depth"]) - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh +# If you want to use this type of model, +# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +import time +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +from sklearn import tree +from sklearn.tree import export_text +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier + +# i_tree = 0 +# +# global_id = 0 + + +def export_p4(decision_tree, fname): + tree_ = decision_tree.tree_ + class_names = decision_tree.classes_ + right_child_fmt = "{} {} <= {}\n" + left_child_fmt = "{} {} > {}\n" + truncation_fmt = "{} {}\n" + feature_names_ = ["{}".format(i) for i in tree_.feature] + export_text.report = "" + max_depth = 10 + spacing = 3 + decimals = 2 + show_weights = False + + if isinstance(decision_tree, DecisionTreeClassifier): + value_fmt = "{}{} weights: {}\n" + if not show_weights: + value_fmt = "{}{}{}\n" + else: + value_fmt = "{}{} value: {}\n" + + def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): + global global_id + global i_tree + global Exact_Table + + current_id = global_id + + val = '' + is_classification = isinstance(decision_tree, + DecisionTreeClassifier) + if show_weights or not is_classification: + val = ["{1:.{0}f}, ".format(decimals, v) for v in value] + val = '[' + ''.join(val)[:-2] + ']' + if is_classification: + val += ' class: ' + str(class_name) + export_text.report += value_fmt.format(indent, '', val) + # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, + # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ + " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ + str(int(float(class_name))) +"\n") + + Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), + "level_" + str(i_tree) + "_" + str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), + str(int(float(class_name)))] + Exact_Table['node table counter'] += 1 + + + + + + def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): + indent = ("|" + (" " * spacing)) * depth + indent = indent[:-spacing] + "-" * spacing + global global_id + global i_tree + global Exact_Table + + global_id = global_id + 1 + current_id = global_id + + value = None + if tree_.n_outputs == 1: + value = tree_.value[node][0] + else: + value = tree_.value[node].T[0] + class_name = np.argmax(value) + + if (tree_.n_classes[0] != 1 and + tree_.n_outputs == 1): + class_name = class_names[class_name] + + if depth <= max_depth + 1: + info_fmt = "" + info_fmt_left = info_fmt + info_fmt_right = info_fmt + + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_names_[node] + threshold = tree_.threshold[node] + threshold = "{1:.{0}f}".format(decimals, threshold) + export_text.report += right_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_left + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ + str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + + " " + str(name) + " " + str(int(float(threshold)))+"\n") + global first_entry + global entry_info + global Exact_Table + + Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), str(name) , + str(int(float(threshold)))] + Exact_Table['node table counter'] += 1 + + if first_entry: + first_entry = False + entry_info += [[previous_id, prevfeature, result]] + + print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) + + export_text.report += left_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_right + # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) + + print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) + else: # leaf + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + subtree_depth = _compute_depth(tree_, node) + if subtree_depth == 1: + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + trunc_report = 'truncated branch of depth %d' % subtree_depth + export_text.report += truncation_fmt.format(indent, + trunc_report) + + print_tree_recurse(0, 1, 0, 1, global_id, fname) + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = 1 + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Decision Tree + model = DecisionTreeClassifier(max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' + # refresh the command (Table) file + with open(fname, 'w') as command: + command.write('') + + global global_id + global i_tree + global first_entry + global entry_info + global Exact_Table + + i_tree = 0 + global_id = 0 + entry_info = [] + Exact_Table = {} + Exact_Table['node table'] = {} + Exact_Table['node table counter'] = 0 + + estimator = model + idx = 0 + with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: + f.write('') + with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: + get_lineage(estimator, feature_names, f) + first_entry = True + i_tree = i_tree + 1 + export_p4(estimator, fname) + # print(entry_info) + + g_table = {} + print("Generating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + g_table['decision'] = g_table['votes to class'] + + collect_class = [] + for idx in g_table['decision']: + collect_class += [g_table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Exact_Table['decision'] = {} + for idx in g_table['decision']: + if g_table['decision'][idx]['class'] != default_class: + Exact_Table['decision'][code_table_size] = g_table['decision'][idx] + code_table_size += 1 + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Depth_Based_Table.txt and Exact_Table.json is generated') + + + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']["number of depth"] = num_depth + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["first entry info"] = entry_info + Planter_config['p4 config']["default label"] = default_class + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + print('The python simulation test does not support this model, please do the following emulation test on the software switch.') + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of depth"]) + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/DT/Type_EB/dedicated_p4.py b/src/models/DT/Type_EB/dedicated_p4.py index 215602a..27d1990 100755 --- a/src/models/DT/Type_EB/dedicated_p4.py +++ b/src/models/DT/Type_EB/dedicated_p4.py @@ -1,299 +1,299 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - # config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - # config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default_lable'] = config_file["default lable"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - - headers.write(" bit<32> DstAddr;\n") - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( - int(np.array(config['code_width'])[i])) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" \ - " hdr.Planter.result = label;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - - ingress.write(" action write_default_class() {\n" - " hdr.Planter.result = " + str(config['default_lable']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - - ingress.write(" table decision {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_class;\n" - " }\n") - ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" - " default_action = write_default_class;\n" - " }\n\n") - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables(Planter_config): - # change this in topology.json to activate: "s1": {"runtime_json": "s1-runtime.json"} - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - label = Ternary_Table['feature ' + str(f)][idx][2] - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["hdr.Planter.feature" + str(f)] = {} - Entry["match"]["hdr.Planter.feature"+str(f)] = [key, mask] - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - Entry["action_params"]["tree"] = int(label) - Entry["priority"] = priority - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - - for idx in Ternary_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Ternary_Table['code to vote'][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(Ternary_Table['code to vote'][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][f] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - label = Ternary_Table['feature ' + str(f)][idx][2] - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - for idx in Ternary_Table['code to vote']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - file.write("table_add SwitchIngress.decision read_lable ") - for f in range(num_features): - file.write(str(Ternary_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") - file.write("=> "+str(Ternary_Table['code to vote'][idx]['leaf'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(table['feature " + str(i) + "'][key][1], table['feature " + str(i) + "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - - tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") - tload.write("for key in table['code to vote']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write(" int(table['code to vote'][key]['leaf']))\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") - tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + # config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + # config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default_lable'] = config_file["default lable"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(config['code_width'][i])) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + + headers.write(" bit<32> DstAddr;\n") + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(np.array(config['code_width'])[i])) + "> meta_code, bit<" + str( + int(np.array(config['code_width'])[i])) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" \ + " hdr.Planter.result = label;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + + ingress.write(" action write_default_class() {\n" + " hdr.Planter.result = " + str(config['default_lable']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + + ingress.write(" table decision {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_class;\n" + " }\n") + ingress.write(" size = " + str(config['code_tbl_depth']) + ";\n" + " default_action = write_default_class;\n" + " }\n\n") + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables(Planter_config): + # change this in topology.json to activate: "s1": {"runtime_json": "s1-runtime.json"} + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + label = Ternary_Table['feature ' + str(f)][idx][2] + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["hdr.Planter.feature" + str(f)] = {} + Entry["match"]["hdr.Planter.feature"+str(f)] = [key, mask] + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + Entry["action_params"]["tree"] = int(label) + Entry["priority"] = priority + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + + for idx in Ternary_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Ternary_Table['code to vote'][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(Ternary_Table['code to vote'][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][f] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + label = Ternary_Table['feature ' + str(f)][idx][2] + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + for idx in Ternary_Table['code to vote']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + file.write("table_add SwitchIngress.decision read_lable ") + for f in range(num_features): + file.write(str(Ternary_Table['code to vote'][idx]['f' + str(f) + ' code'])+" ") + file.write("=> "+str(Ternary_Table['code to vote'][idx]['leaf'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + commend_file = work_root+"/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str(i) + "'][key][2]), int(config['width of code'][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(table['feature " + str(i) + "'][key][1], table['feature " + str(i) + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + + tload.write("print('load tree (code/code to vote) table with',len(table['code to vote'].keys()),'entries')\n") + tload.write("for key in table['code to vote']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write(" int(table['code to vote'][key]['leaf']))\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['code to vote'][key]['f" + str(f) + " code'], ") + tload.write("int(table['code to vote'][key]['leaf'])), end='')\n\n") + diff --git a/src/models/DT/Type_EB/readme.md b/src/models/DT/Type_EB/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_EB/readme.md +++ b/src/models/DT/Type_EB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_depth_based_bmv2_only/dedicated_p4.py b/src/models/DT/Type_depth_based_bmv2_only/dedicated_p4.py index 66a8cd8..0e54fd6 100755 --- a/src/models/DT/Type_depth_based_bmv2_only/dedicated_p4.py +++ b/src/models/DT/Type_depth_based_bmv2_only/dedicated_p4.py @@ -1,275 +1,275 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['num_depth'] = config_file["number of depth"] - config['model'] = config_file['model'] - config["decision_table_size"] = config_file["decision table size"] - config['default label'] = config_file["default label"] - config['first_entry'] = config_file["first entry info"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n" - "#define CLASS_NOT_SET 10\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - - headers.write("#define CLASS_NOT_SET 10\n\n") - # headers.write("struct metadata_t {\n") - for t in range(0, config['num_trees']): - headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") - headers.write(" bit<16> node_id;\n" - " bit<16> prevFeature;\n" - " bit<16> isTrue;\n" - " bit<32> DstAddr;\n") - # "}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") - ingress.write("\n") - for t in range(0, config['num_trees']): - ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" - " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" - " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") - for d in range(0, config['num_depth']): - ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") - ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") - ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") - ingress.write(" " + (config['num_depth']) * "} " + "\n\n") - - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" - " bit<32> feature = 0;\n" - # " bit<32> th = threshold ;\n" - " bit<16> f = f_inout ;\n") - for f in range(0, config['num_features']): - ingress.write(" if (f == "+str(f)+") {\n" - " feature = meta.feature"+str(f)+";\n" - " }\n") - ingress.write(" bit<32> th = threshold - feature;\n" - # " if (feature <= th){\n" # if (feature <= th){ - " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ - " meta.isTrue = 1;\n" - " }else{\n" - " meta.isTrue = 0;\n" - " }\n" - " meta.prevFeature = f;\n" - " meta.node_id = node_id;\n") - - ingress.write(" }\n\n") - - for t in range(0, config['num_trees']): - ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" - " meta.tree_" + str(t+1) + "_vote = class;\n" - " meta.node_id = node_id; // just for debugging otherwise not needed\n" - " }\n") - - - for t in range(0, config['num_trees']): - for d in range(0, config['num_depth']+1): - ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" - " key = {\n" - " meta.node_id: exact;\n" - " meta.prevFeature: exact;\n" - " meta.isTrue: exact;\n" - " }\n" - " actions = {\n" - " NoAction;\n" - " CheckFeature;\n" - " SetClass"+str(t+1)+";\n" - " }\n" - " size = 1024;\n" - " }\n\n") - - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_command(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Table = json.load(open('Tables/Exact_Table.json', 'r')) - - fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' - - - with open(fname_current, 'a') as file: - for idx in Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") - file.write("=> " + str(Table['decision'][idx]['class']) + "\n") - - - - with open(fname, 'w') as command: - command.write('') - current_file = open(fname_current, 'r') - total_entries = 0 - for line in current_file: - new_file = open(fname, 'a') # Use append mode here - new_file.write(line) - total_entries += 1 - print('Actual exact table entries:', total_entries, '...', end='') - current_file.close() - new_file.close() - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - table = json.load(open('./Tables/Exact_Table.json', 'r')) - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - - # Load decision tables - tload.write("print('load table for each nodes')\n") - for idx in table['node table']: - if table['node table'][idx][0] == "CheckFeature": - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] + ', ' + table['node table'][idx][7] - else: - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] - - tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ - '('+key_and_values+')\n') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['num_depth'] = config_file["number of depth"] + config['model'] = config_file['model'] + config["decision_table_size"] = config_file["decision table size"] + config['default label'] = config_file["default label"] + config['first_entry'] = config_file["first entry info"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n" + "#define CLASS_NOT_SET 10\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + + headers.write("#define CLASS_NOT_SET 10\n\n") + # headers.write("struct metadata_t {\n") + for t in range(0, config['num_trees']): + headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") + headers.write(" bit<16> node_id;\n" + " bit<16> prevFeature;\n" + " bit<16> isTrue;\n" + " bit<32> DstAddr;\n") + # "}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") + ingress.write("\n") + for t in range(0, config['num_trees']): + ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" + " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" + " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") + for d in range(0, config['num_depth']): + ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") + ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") + ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") + ingress.write(" " + (config['num_depth']) * "} " + "\n\n") + + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" + " bit<32> feature = 0;\n" + # " bit<32> th = threshold ;\n" + " bit<16> f = f_inout ;\n") + for f in range(0, config['num_features']): + ingress.write(" if (f == "+str(f)+") {\n" + " feature = meta.feature"+str(f)+";\n" + " }\n") + ingress.write(" bit<32> th = threshold - feature;\n" + # " if (feature <= th){\n" # if (feature <= th){ + " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ + " meta.isTrue = 1;\n" + " }else{\n" + " meta.isTrue = 0;\n" + " }\n" + " meta.prevFeature = f;\n" + " meta.node_id = node_id;\n") + + ingress.write(" }\n\n") + + for t in range(0, config['num_trees']): + ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" + " meta.tree_" + str(t+1) + "_vote = class;\n" + " meta.node_id = node_id; // just for debugging otherwise not needed\n" + " }\n") + + + for t in range(0, config['num_trees']): + for d in range(0, config['num_depth']+1): + ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" + " key = {\n" + " meta.node_id: exact;\n" + " meta.prevFeature: exact;\n" + " meta.isTrue: exact;\n" + " }\n" + " actions = {\n" + " NoAction;\n" + " CheckFeature;\n" + " SetClass"+str(t+1)+";\n" + " }\n" + " size = 1024;\n" + " }\n\n") + + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_command(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Table = json.load(open('Tables/Exact_Table.json', 'r')) + + fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' + + + with open(fname_current, 'a') as file: + for idx in Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") + file.write("=> " + str(Table['decision'][idx]['class']) + "\n") + + + + with open(fname, 'w') as command: + command.write('') + current_file = open(fname_current, 'r') + total_entries = 0 + for line in current_file: + new_file = open(fname, 'a') # Use append mode here + new_file.write(line) + total_entries += 1 + print('Actual exact table entries:', total_entries, '...', end='') + current_file.close() + new_file.close() + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + table = json.load(open('./Tables/Exact_Table.json', 'r')) + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + + # Load decision tables + tload.write("print('load table for each nodes')\n") + for idx in table['node table']: + if table['node table'][idx][0] == "CheckFeature": + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + ', ' + table['node table'][idx][7] + else: + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + + tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ + '('+key_and_values+')\n') diff --git a/src/models/DT/Type_depth_based_bmv2_only/readme.md b/src/models/DT/Type_depth_based_bmv2_only/readme.md index 8b1ff67..f57f837 100644 --- a/src/models/DT/Type_depth_based_bmv2_only/readme.md +++ b/src/models/DT/Type_depth_based_bmv2_only/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for DT. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/DT/Type_depth_based_bmv2_only/table_generator.py b/src/models/DT/Type_depth_based_bmv2_only/table_generator.py index f25ffb4..017c68e 100755 --- a/src/models/DT/Type_depth_based_bmv2_only/table_generator.py +++ b/src/models/DT/Type_depth_based_bmv2_only/table_generator.py @@ -1,393 +1,393 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh -# If you want to use this type of model, -# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -import time -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -from sklearn import tree -from sklearn.tree import export_text -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier - - - - -def export_p4(decision_tree, fname): - tree_ = decision_tree.tree_ - class_names = decision_tree.classes_ - right_child_fmt = "{} {} <= {}\n" - left_child_fmt = "{} {} > {}\n" - truncation_fmt = "{} {}\n" - feature_names_ = ["{}".format(i) for i in tree_.feature] - export_text.report = "" - max_depth = 10 - spacing = 3 - decimals = 2 - show_weights = False - - if isinstance(decision_tree, DecisionTreeClassifier): - value_fmt = "{}{} weights: {}\n" - if not show_weights: - value_fmt = "{}{}{}\n" - else: - value_fmt = "{}{} value: {}\n" - - def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): - global global_id - global i_tree - global Exact_Table - - current_id = global_id - - val = '' - is_classification = isinstance(decision_tree, - DecisionTreeClassifier) - if show_weights or not is_classification: - val = ["{1:.{0}f}, ".format(decimals, v) for v in value] - val = '[' + ''.join(val)[:-2] + ']' - if is_classification: - val += ' class: ' + str(class_name) - export_text.report += value_fmt.format(indent, '', val) - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ - " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ - str(int(float(class_name))) +"\n") - - Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), - "level_" + str(i_tree) + "_" + str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), - str(int(float(class_name)))] - Exact_Table['node table counter'] += 1 - - - - - - def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): - indent = ("|" + (" " * spacing)) * depth - indent = indent[:-spacing] + "-" * spacing - global global_id - global i_tree - global Exact_Table - - global_id = global_id + 1 - current_id = global_id - - value = None - if tree_.n_outputs == 1: - value = tree_.value[node][0] - else: - value = tree_.value[node].T[0] - class_name = np.argmax(value) - - if (tree_.n_classes[0] != 1 and - tree_.n_outputs == 1): - class_name = class_names[class_name] - - if depth <= max_depth + 1: - info_fmt = "" - info_fmt_left = info_fmt - info_fmt_right = info_fmt - - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_names_[node] - threshold = tree_.threshold[node] - threshold = "{1:.{0}f}".format(decimals, threshold) - export_text.report += right_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_left - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ - str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + - " " + str(name) + " " + str(int(float(threshold)))+"\n") - global first_entry - global entry_info - global Exact_Table - - Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), str(name) , - str(int(float(threshold)))] - Exact_Table['node table counter'] += 1 - - if first_entry: - first_entry = False - entry_info += [[previous_id, prevfeature, result]] - - print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) - - export_text.report += left_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_right - - - print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) - else: # leaf - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - subtree_depth = _compute_depth(tree_, node) - if subtree_depth == 1: - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - trunc_report = 'truncated branch of depth %d' % subtree_depth - export_text.report += truncation_fmt.format(indent, - trunc_report) - - print_tree_recurse(0, 1, 0, 1, global_id, fname) - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = 1 - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Decision Tree - model = DecisionTreeClassifier(max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - model.fit(train_X, train_y) - sklearn_y_predict = model.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' - # refresh the command (Table) file - with open(fname, 'w') as command: - command.write('') - - global global_id - global i_tree - global first_entry - global entry_info - global Exact_Table - - i_tree = 0 - global_id = 0 - entry_info = [] - Exact_Table = {} - Exact_Table['node table'] = {} - Exact_Table['node table counter'] = 0 - - estimator = model - idx = 0 - with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: - f.write('') - with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: - get_lineage(estimator, feature_names, f) - first_entry = True - i_tree = i_tree + 1 - export_p4(estimator, fname) - - - g_table = {} - print("Generating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - g_table['decision'] = g_table['votes to class'] - - collect_class = [] - for idx in g_table['decision']: - collect_class += [g_table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Exact_Table['decision'] = {} - for idx in g_table['decision']: - if g_table['decision'][idx]['class'] != default_class: - Exact_Table['decision'][code_table_size] = g_table['decision'][idx] - code_table_size += 1 - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Depth_Based_Table.txt and Exact_Table.json is generated') - - - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']["number of depth"] = num_depth - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["first entry info"] = entry_info - Planter_config['p4 config']["default label"] = default_class - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - print('The python simulation test does not support this model, please do the following emulation test on the software switch.') - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of depth"]) - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh +# If you want to use this type of model, +# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +import time +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +from sklearn import tree +from sklearn.tree import export_text +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier + + + + +def export_p4(decision_tree, fname): + tree_ = decision_tree.tree_ + class_names = decision_tree.classes_ + right_child_fmt = "{} {} <= {}\n" + left_child_fmt = "{} {} > {}\n" + truncation_fmt = "{} {}\n" + feature_names_ = ["{}".format(i) for i in tree_.feature] + export_text.report = "" + max_depth = 10 + spacing = 3 + decimals = 2 + show_weights = False + + if isinstance(decision_tree, DecisionTreeClassifier): + value_fmt = "{}{} weights: {}\n" + if not show_weights: + value_fmt = "{}{}{}\n" + else: + value_fmt = "{}{} value: {}\n" + + def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): + global global_id + global i_tree + global Exact_Table + + current_id = global_id + + val = '' + is_classification = isinstance(decision_tree, + DecisionTreeClassifier) + if show_weights or not is_classification: + val = ["{1:.{0}f}, ".format(decimals, v) for v in value] + val = '[' + ''.join(val)[:-2] + ']' + if is_classification: + val += ' class: ' + str(class_name) + export_text.report += value_fmt.format(indent, '', val) + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ + " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ + str(int(float(class_name))) +"\n") + + Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), + "level_" + str(i_tree) + "_" + str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), + str(int(float(class_name)))] + Exact_Table['node table counter'] += 1 + + + + + + def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): + indent = ("|" + (" " * spacing)) * depth + indent = indent[:-spacing] + "-" * spacing + global global_id + global i_tree + global Exact_Table + + global_id = global_id + 1 + current_id = global_id + + value = None + if tree_.n_outputs == 1: + value = tree_.value[node][0] + else: + value = tree_.value[node].T[0] + class_name = np.argmax(value) + + if (tree_.n_classes[0] != 1 and + tree_.n_outputs == 1): + class_name = class_names[class_name] + + if depth <= max_depth + 1: + info_fmt = "" + info_fmt_left = info_fmt + info_fmt_right = info_fmt + + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_names_[node] + threshold = tree_.threshold[node] + threshold = "{1:.{0}f}".format(decimals, threshold) + export_text.report += right_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_left + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ + str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + + " " + str(name) + " " + str(int(float(threshold)))+"\n") + global first_entry + global entry_info + global Exact_Table + + Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), str(name) , + str(int(float(threshold)))] + Exact_Table['node table counter'] += 1 + + if first_entry: + first_entry = False + entry_info += [[previous_id, prevfeature, result]] + + print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) + + export_text.report += left_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_right + + + print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) + else: # leaf + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + subtree_depth = _compute_depth(tree_, node) + if subtree_depth == 1: + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + trunc_report = 'truncated branch of depth %d' % subtree_depth + export_text.report += truncation_fmt.format(indent, + trunc_report) + + print_tree_recurse(0, 1, 0, 1, global_id, fname) + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = 1 + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Decision Tree + model = DecisionTreeClassifier(max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + model.fit(train_X, train_y) + sklearn_y_predict = model.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' + # refresh the command (Table) file + with open(fname, 'w') as command: + command.write('') + + global global_id + global i_tree + global first_entry + global entry_info + global Exact_Table + + i_tree = 0 + global_id = 0 + entry_info = [] + Exact_Table = {} + Exact_Table['node table'] = {} + Exact_Table['node table counter'] = 0 + + estimator = model + idx = 0 + with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: + f.write('') + with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: + get_lineage(estimator, feature_names, f) + first_entry = True + i_tree = i_tree + 1 + export_p4(estimator, fname) + + + g_table = {} + print("Generating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + g_table['decision'] = g_table['votes to class'] + + collect_class = [] + for idx in g_table['decision']: + collect_class += [g_table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Exact_Table['decision'] = {} + for idx in g_table['decision']: + if g_table['decision'][idx]['class'] != default_class: + Exact_Table['decision'][code_table_size] = g_table['decision'][idx] + code_table_size += 1 + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Depth_Based_Table.txt and Exact_Table.json is generated') + + + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']["number of depth"] = num_depth + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["first entry info"] = entry_info + Planter_config['p4 config']["default label"] = default_class + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + print('The python simulation test does not support this model, please do the following emulation test on the software switch.') + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of depth"]) + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/DT/readme.md b/src/models/DT/readme.md index 224f46c..bdca0eb 100644 --- a/src/models/DT/readme.md +++ b/src/models/DT/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported DT. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported DT. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/IF/Type_1/dedicated_p4.py b/src/models/IF/Type_1/dedicated_p4.py index 2ead009..19a0cc3 100755 --- a/src/models/IF/Type_1/dedicated_p4.py +++ b/src/models/IF/Type_1/dedicated_p4.py @@ -1,317 +1,317 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + - "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + + "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - + int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/IF/Type_1/readme.md b/src/models/IF/Type_1/readme.md index 6dff52f..a9901f8 100644 --- a/src/models/IF/Type_1/readme.md +++ b/src/models/IF/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/IF/Type_1/table_generator.py b/src/models/IF/Type_1/table_generator.py index a9080c2..617ba19 100755 --- a/src/models/IF/Type_1/table_generator.py +++ b/src/models/IF/Type_1/table_generator.py @@ -1,612 +1,612 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import matplotlib.pyplot as plt -from sklearn.ensemble import IsolationForest -import math -import json -import copy -from sklearn.metrics import * -import re -from src.functions.json_encoder import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -from src.functions.Muti_Exact_to_LPM import * - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - n_node_samples = tree.tree_.n_node_samples - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - # print(node) - ind = n_node_samples[node] - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - num_nodes = 0 - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - num_nodes += 1 - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append([num_nodes, int(num[i][a - 1])]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - # print(classfication) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - leaf_info['tree '+str(tree_index)]= np.unique(classfication, axis=0) - return g_table, leaf_info - -def _average_path_length(n_samples_leaf): - """ - The average path length in a n_samples iTree, which is equal to - the average path length of an unsuccessful BST search since the - latter has the same structure as an isolation tree. - Parameters - ---------- - n_samples_leaf : array-like of shape (n_samples,) - The number of training samples in each test sample leaf, for - each estimators. - - Returns - ------- - average_path_length : ndarray of shape (n_samples,) - """ - - # n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False) - - n_samples_leaf_shape = n_samples_leaf.shape - n_samples_leaf = n_samples_leaf.reshape((1, -1)) - average_path_length = np.zeros(n_samples_leaf.shape) - - mask_1 = n_samples_leaf <= 1 - mask_2 = n_samples_leaf == 2 - not_mask = ~np.logical_or(mask_1, mask_2) - - average_path_length[mask_1] = 0. - average_path_length[mask_2] = 1. - average_path_length[not_mask] = ( - 2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma) - - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask] - ) - - return average_path_length.reshape(n_samples_leaf_shape) - -def complex_list_idx(target_list, component): - for i, x in enumerate(target_list): - if np.all(x==component): - # print(i) - return i - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): - if tree_num == num_trees: - vote = 0 - for t in range(num_trees): - vote += (leaf_info["tree "+str(t)][vote_list[t]][0] + _average_path_length(leaf_info["tree "+str(t)][vote_list[t]][1])) - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] - # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - if vote >= path_len_threshold*num_trees: - g_table['votes to class'][num]['class'] = 0 - else: - g_table['votes to class'][num]['class'] = 1 - - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) - tree_num -= 1 - return g_table, num - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - num_features = Planter_config['data config']['number of features'] - num_samples = Planter_config['model config']['number of samples'] - num_classes = Planter_config['model config']['number of classes'] - # num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.5, 2)) - print("The threshold of path length is %.2f" % path_len_threshold) - # max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - - - rng = np.random.RandomState(42) - - - - # fit the model - clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) - clf.fit(train_X) - - clf.decision_function(train_X) - - y_pred_test = clf.predict(test_X) - sklearn_y_predict = copy.deepcopy(y_pred_test) - - for i in range(len(y_pred_test)): - if y_pred_test[i] == -1: - sklearn_y_predict[i] = 1 - if y_pred_test[i] == 1: - sklearn_y_predict[i] = 0 - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - g_table = {} - leaf_info = {} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx, estimator in enumerate(clf.estimators_): - g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) - - - g_table['votes to class'] = {} - print("\nGenerating vote to class table...", end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) - print('Done') - - for t in range(num_trees): - leaf_info['tree ' + str(t)] = list(leaf_info['tree ' + str(t)]) - for i, x in enumerate(leaf_info['tree ' + str(t)]): - leaf_info['tree ' + str(t)][i] = str(list(x)) - - for t in range(num_trees): - for k in g_table[t]['code to vote'].keys(): - g_table[t]['code to vote'][k]['leaf'] = leaf_info['tree ' + str(t)].index(str(list(g_table[t]['code to vote'][k]['leaf']))) - - for k in g_table['votes to class'].keys(): - for t in range(num_trees): - g_table['votes to class'][k]['t'+str(t)+' vote'] = leaf_info['tree ' + str(t)].index(str(list(g_table['votes to class'][k]['t'+str(t)+' vote']))) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros((num_trees, num_features)) - for i in range(num_features): - for tree in range(num_trees): - # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) - code_width_tree_feature[tree, i] = int(np.ceil(math.log( - g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) - # print('stop') - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature ' + str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i != 0: - print('') - print('Begine transfer: Feature table ' + str(i)) - LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - print("\nPreparing default vote...", end="") - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - LPM_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) - print('Done') - # ===================== prepare default class ========================= - print("Preparing default class...", end="") - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - code_table_size = 0 - LPM_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - print('Done') - - table_name = 'LPM_Table.json' - json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) - print('LPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "IF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - -def test_tables(sklearn_test_y, test_X, test_y): - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list) != str(lpm_code_list): - print('error in exact to ternary match', code_list, lpm_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), - f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), - end=" ") - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.ensemble import IsolationForest +import math +import json +import copy +from sklearn.metrics import * +import re +from src.functions.json_encoder import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +from src.functions.Muti_Exact_to_LPM import * + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + n_node_samples = tree.tree_.n_node_samples + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + # print(node) + ind = n_node_samples[node] + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + num_nodes = 0 + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + num_nodes += 1 + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append([num_nodes, int(num[i][a - 1])]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + # print(classfication) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + leaf_info['tree '+str(tree_index)]= np.unique(classfication, axis=0) + return g_table, leaf_info + +def _average_path_length(n_samples_leaf): + """ + The average path length in a n_samples iTree, which is equal to + the average path length of an unsuccessful BST search since the + latter has the same structure as an isolation tree. + Parameters + ---------- + n_samples_leaf : array-like of shape (n_samples,) + The number of training samples in each test sample leaf, for + each estimators. + + Returns + ------- + average_path_length : ndarray of shape (n_samples,) + """ + + # n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False) + + n_samples_leaf_shape = n_samples_leaf.shape + n_samples_leaf = n_samples_leaf.reshape((1, -1)) + average_path_length = np.zeros(n_samples_leaf.shape) + + mask_1 = n_samples_leaf <= 1 + mask_2 = n_samples_leaf == 2 + not_mask = ~np.logical_or(mask_1, mask_2) + + average_path_length[mask_1] = 0. + average_path_length[mask_2] = 1. + average_path_length[not_mask] = ( + 2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma) + - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask] + ) + + return average_path_length.reshape(n_samples_leaf_shape) + +def complex_list_idx(target_list, component): + for i, x in enumerate(target_list): + if np.all(x==component): + # print(i) + return i + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): + if tree_num == num_trees: + vote = 0 + for t in range(num_trees): + vote += (leaf_info["tree "+str(t)][vote_list[t]][0] + _average_path_length(leaf_info["tree "+str(t)][vote_list[t]][1])) + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] + # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + if vote >= path_len_threshold*num_trees: + g_table['votes to class'][num]['class'] = 0 + else: + g_table['votes to class'][num]['class'] = 1 + + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) + tree_num -= 1 + return g_table, num + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + num_features = Planter_config['data config']['number of features'] + num_samples = Planter_config['model config']['number of samples'] + num_classes = Planter_config['model config']['number of classes'] + # num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.5, 2)) + print("The threshold of path length is %.2f" % path_len_threshold) + # max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + + + rng = np.random.RandomState(42) + + + + # fit the model + clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) + clf.fit(train_X) + + clf.decision_function(train_X) + + y_pred_test = clf.predict(test_X) + sklearn_y_predict = copy.deepcopy(y_pred_test) + + for i in range(len(y_pred_test)): + if y_pred_test[i] == -1: + sklearn_y_predict[i] = 1 + if y_pred_test[i] == 1: + sklearn_y_predict[i] = 0 + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + g_table = {} + leaf_info = {} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx, estimator in enumerate(clf.estimators_): + g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) + + + g_table['votes to class'] = {} + print("\nGenerating vote to class table...", end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) + print('Done') + + for t in range(num_trees): + leaf_info['tree ' + str(t)] = list(leaf_info['tree ' + str(t)]) + for i, x in enumerate(leaf_info['tree ' + str(t)]): + leaf_info['tree ' + str(t)][i] = str(list(x)) + + for t in range(num_trees): + for k in g_table[t]['code to vote'].keys(): + g_table[t]['code to vote'][k]['leaf'] = leaf_info['tree ' + str(t)].index(str(list(g_table[t]['code to vote'][k]['leaf']))) + + for k in g_table['votes to class'].keys(): + for t in range(num_trees): + g_table['votes to class'][k]['t'+str(t)+' vote'] = leaf_info['tree ' + str(t)].index(str(list(g_table['votes to class'][k]['t'+str(t)+' vote']))) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros((num_trees, num_features)) + for i in range(num_features): + for tree in range(num_trees): + # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) + code_width_tree_feature[tree, i] = int(np.ceil(math.log( + g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) + # print('stop') + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature ' + str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i != 0: + print('') + print('Begine transfer: Feature table ' + str(i)) + LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + print("\nPreparing default vote...", end="") + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + LPM_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) + print('Done') + # ===================== prepare default class ========================= + print("Preparing default class...", end="") + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + code_table_size = 0 + LPM_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + print('Done') + + table_name = 'LPM_Table.json' + json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) + print('LPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "IF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + +def test_tables(sklearn_test_y, test_X, test_y): + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list) != str(lpm_code_list): + print('error in exact to ternary match', code_list, lpm_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), + f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), + end=" ") + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) diff --git a/src/models/IF/Type_2/dedicated_p4.py b/src/models/IF/Type_2/dedicated_p4.py index 2ead009..19a0cc3 100755 --- a/src/models/IF/Type_2/dedicated_p4.py +++ b/src/models/IF/Type_2/dedicated_p4.py @@ -1,317 +1,317 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + - "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + + "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - + int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/IF/Type_2/readme.md b/src/models/IF/Type_2/readme.md index 6dff52f..a9901f8 100644 --- a/src/models/IF/Type_2/readme.md +++ b/src/models/IF/Type_2/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/IF/Type_2/table_generator.py b/src/models/IF/Type_2/table_generator.py index 8dfaff1..ec1ac14 100755 --- a/src/models/IF/Type_2/table_generator.py +++ b/src/models/IF/Type_2/table_generator.py @@ -1,554 +1,554 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import matplotlib.pyplot as plt -from sklearn.ensemble import IsolationForest -import math -import json -import copy -from sklearn.metrics import * -import re -from src.functions.json_encoder import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -from src.functions.Muti_Exact_to_LPM import * - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - n_node_samples = tree.tree_.n_node_samples - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - # print(node) - ind = n_node_samples[node] - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - num_nodes = 0 - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - num_nodes += 1 - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num_nodes) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - leaf_info['tree '+str(tree_index)]= np.unique(classfication) - return g_table, leaf_info - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): - if tree_num == num_trees: - vote = 0 - for t in range(num_trees): - vote += leaf_info["tree "+str(t)][vote_list[t]] - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] - # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - if vote >= path_len_threshold*num_trees: - g_table['votes to class'][num]['class'] = 0 - else: - g_table['votes to class'][num]['class'] = 1 - - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) - tree_num -= 1 - return g_table, num - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - num_features = Planter_config['data config']['number of features'] - num_samples = Planter_config['model config']['number of samples'] - num_classes = Planter_config['model config']['number of classes'] - - num_trees = Planter_config['model config']['number of trees'] - path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.6, 2)) - print("The threshold of path length is %.2f" % path_len_threshold) - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - - rng = np.random.RandomState(42) - - - - # fit the model - clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) - clf.fit(train_X) - - clf.decision_function(train_X) - - y_pred_test = clf.predict(test_X) - sklearn_y_predict = copy.deepcopy(y_pred_test) - - for i in range(len(y_pred_test)): - if y_pred_test[i] == -1: - sklearn_y_predict[i] = 1 - if y_pred_test[i] == 1: - sklearn_y_predict[i] = 0 - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - g_table = {} - leaf_info = {} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx, estimator in enumerate(clf.estimators_): - g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) - - - - g_table['votes to class'] = {} - print("\nGenerating vote to class table...", end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) - print('Done') - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros((num_trees, num_features)) - for i in range(num_features): - for tree in range(num_trees): - - code_width_tree_feature[tree, i] = int(np.ceil(math.log( - g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature ' + str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i != 0: - print('') - print('Begine transfer: Feature table ' + str(i)) - LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - LPM_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) - - # ===================== prepare default class ========================= - - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - LPM_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - - table_name = 'LPM_Table.json' - json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) - print('\nLPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "IF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - -def test_tables(sklearn_test_y, test_X, test_y): - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list) != str(lpm_code_list): - print('error in exact to ternary match', code_list, lpm_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), - f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), - end=" ") - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.ensemble import IsolationForest +import math +import json +import copy +from sklearn.metrics import * +import re +from src.functions.json_encoder import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +from src.functions.Muti_Exact_to_LPM import * + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + n_node_samples = tree.tree_.n_node_samples + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + # print(node) + ind = n_node_samples[node] + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + num_nodes = 0 + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + num_nodes += 1 + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num_nodes) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + leaf_info['tree '+str(tree_index)]= np.unique(classfication) + return g_table, leaf_info + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): + if tree_num == num_trees: + vote = 0 + for t in range(num_trees): + vote += leaf_info["tree "+str(t)][vote_list[t]] + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] + # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + if vote >= path_len_threshold*num_trees: + g_table['votes to class'][num]['class'] = 0 + else: + g_table['votes to class'][num]['class'] = 1 + + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) + tree_num -= 1 + return g_table, num + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + num_features = Planter_config['data config']['number of features'] + num_samples = Planter_config['model config']['number of samples'] + num_classes = Planter_config['model config']['number of classes'] + + num_trees = Planter_config['model config']['number of trees'] + path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.6, 2)) + print("The threshold of path length is %.2f" % path_len_threshold) + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + + rng = np.random.RandomState(42) + + + + # fit the model + clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) + clf.fit(train_X) + + clf.decision_function(train_X) + + y_pred_test = clf.predict(test_X) + sklearn_y_predict = copy.deepcopy(y_pred_test) + + for i in range(len(y_pred_test)): + if y_pred_test[i] == -1: + sklearn_y_predict[i] = 1 + if y_pred_test[i] == 1: + sklearn_y_predict[i] = 0 + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + g_table = {} + leaf_info = {} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx, estimator in enumerate(clf.estimators_): + g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) + + + + g_table['votes to class'] = {} + print("\nGenerating vote to class table...", end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) + print('Done') + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros((num_trees, num_features)) + for i in range(num_features): + for tree in range(num_trees): + + code_width_tree_feature[tree, i] = int(np.ceil(math.log( + g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature ' + str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i != 0: + print('') + print('Begine transfer: Feature table ' + str(i)) + LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + LPM_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) + + # ===================== prepare default class ========================= + + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + LPM_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + + table_name = 'LPM_Table.json' + json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) + print('\nLPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "IF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + +def test_tables(sklearn_test_y, test_X, test_y): + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list) != str(lpm_code_list): + print('error in exact to ternary match', code_list, lpm_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), + f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), + end=" ") + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) diff --git a/src/models/IF/Type_EB/dedicated_p4.py b/src/models/IF/Type_EB/dedicated_p4.py index eb4699d..5aead3c 100755 --- a/src/models/IF/Type_EB/dedicated_p4.py +++ b/src/models/IF/Type_EB/dedicated_p4.py @@ -1,317 +1,317 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { hdr.Planter.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + - "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { hdr.Planter.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + + "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - + int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/IF/Type_EB/readme.md b/src/models/IF/Type_EB/readme.md index 6dff52f..a9901f8 100644 --- a/src/models/IF/Type_EB/readme.md +++ b/src/models/IF/Type_EB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/IF/Type_EB/table_generator.py b/src/models/IF/Type_EB/table_generator.py index bfc6d74..2747149 100755 --- a/src/models/IF/Type_EB/table_generator.py +++ b/src/models/IF/Type_EB/table_generator.py @@ -1,613 +1,613 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import matplotlib.pyplot as plt -from sklearn.ensemble import IsolationForest -import math -import json -import copy -from sklearn.metrics import * -import re -from src.functions.json_encoder import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -from src.functions.Muti_Exact_to_LPM import * - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - n_node_samples = tree.tree_.n_node_samples - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - # print(node) - ind = n_node_samples[node] - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - num_nodes = 0 - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - num_nodes += 1 - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append([num_nodes, int(num[i][a - 1])]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - # print(classfication) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - leaf_info['tree '+str(tree_index)]= np.unique(classfication, axis=0) - return g_table, leaf_info - -def _average_path_length(n_samples_leaf): - """ - The average path length in a n_samples iTree, which is equal to - the average path length of an unsuccessful BST search since the - latter has the same structure as an isolation tree. - Parameters - ---------- - n_samples_leaf : array-like of shape (n_samples,) - The number of training samples in each test sample leaf, for - each estimators. - - Returns - ------- - average_path_length : ndarray of shape (n_samples,) - """ - - # n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False) - - n_samples_leaf_shape = n_samples_leaf.shape - n_samples_leaf = n_samples_leaf.reshape((1, -1)) - average_path_length = np.zeros(n_samples_leaf.shape) - - mask_1 = n_samples_leaf <= 1 - mask_2 = n_samples_leaf == 2 - not_mask = ~np.logical_or(mask_1, mask_2) - - average_path_length[mask_1] = 0. - average_path_length[mask_2] = 1. - average_path_length[not_mask] = ( - 2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma) - - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask] - ) - - return average_path_length.reshape(n_samples_leaf_shape) - -def complex_list_idx(target_list, component): - for i, x in enumerate(target_list): - if np.all(x==component): - # print(i) - return i - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): - if tree_num == num_trees: - vote = 0 - for t in range(num_trees): - vote += (leaf_info["tree "+str(t)][vote_list[t]][0] + _average_path_length(leaf_info["tree "+str(t)][vote_list[t]][1])) - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] - # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - if vote >= path_len_threshold*num_trees: - g_table['votes to class'][num]['class'] = 0 - else: - g_table['votes to class'][num]['class'] = 1 - # g_table['votes to class'][num]['class'] = vote - # print(g_table['votes to class'][num]) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) - tree_num -= 1 - return g_table, num - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - num_features = Planter_config['data config']['number of features'] - num_samples = Planter_config['model config']['number of samples'] - num_classes = Planter_config['model config']['number of classes'] - # num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.5, 2)) - print("The threshold of path length is %.2f" % path_len_threshold) - # max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - - - rng = np.random.RandomState(42) - - - - # fit the model - clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) - clf.fit(train_X) - - clf.decision_function(train_X) - - y_pred_test = clf.predict(test_X) - sklearn_y_predict = copy.deepcopy(y_pred_test) - - for i in range(len(y_pred_test)): - if y_pred_test[i] == -1: - sklearn_y_predict[i] = 1 - if y_pred_test[i] == 1: - sklearn_y_predict[i] = 0 - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - g_table = {} - leaf_info = {} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx, estimator in enumerate(clf.estimators_): - g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) - - - g_table['votes to class'] = {} - print("\nGenerating vote to class table...", end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) - print('Done') - - for t in range(num_trees): - leaf_info['tree ' + str(t)] = list(leaf_info['tree ' + str(t)]) - for i, x in enumerate(leaf_info['tree ' + str(t)]): - leaf_info['tree ' + str(t)][i] = str(list(x)) - - for t in range(num_trees): - for k in g_table[t]['code to vote'].keys(): - g_table[t]['code to vote'][k]['leaf'] = leaf_info['tree ' + str(t)].index(str(list(g_table[t]['code to vote'][k]['leaf']))) - - for k in g_table['votes to class'].keys(): - for t in range(num_trees): - g_table['votes to class'][k]['t'+str(t)+' vote'] = leaf_info['tree ' + str(t)].index(str(list(g_table['votes to class'][k]['t'+str(t)+' vote']))) - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros((num_trees, num_features)) - for i in range(num_features): - for tree in range(num_trees): - # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) - code_width_tree_feature[tree, i] = int(np.ceil(math.log( - g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) - # print('stop') - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature ' + str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i != 0: - print('') - print('Begine transfer: Feature table ' + str(i)) - LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - print("\nPreparing default vote...", end="") - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - LPM_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) - print('Done') - # ===================== prepare default class ========================= - print("Preparing default class...", end="") - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - code_table_size = 0 - LPM_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - print('Done') - - table_name = 'LPM_Table.json' - json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) - print('LPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "IF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - -def test_tables(sklearn_test_y, test_X, test_y): - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list) != str(lpm_code_list): - print('error in exact to ternary match', code_list, lpm_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), - f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), - end=" ") - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.ensemble import IsolationForest +import math +import json +import copy +from sklearn.metrics import * +import re +from src.functions.json_encoder import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +from src.functions.Muti_Exact_to_LPM import * + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + n_node_samples = tree.tree_.n_node_samples + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + # print(node) + ind = n_node_samples[node] + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + num_nodes = 0 + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + num_nodes += 1 + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append([num_nodes, int(num[i][a - 1])]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + # print(classfication) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + leaf_info['tree '+str(tree_index)]= np.unique(classfication, axis=0) + return g_table, leaf_info + +def _average_path_length(n_samples_leaf): + """ + The average path length in a n_samples iTree, which is equal to + the average path length of an unsuccessful BST search since the + latter has the same structure as an isolation tree. + Parameters + ---------- + n_samples_leaf : array-like of shape (n_samples,) + The number of training samples in each test sample leaf, for + each estimators. + + Returns + ------- + average_path_length : ndarray of shape (n_samples,) + """ + + # n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False) + + n_samples_leaf_shape = n_samples_leaf.shape + n_samples_leaf = n_samples_leaf.reshape((1, -1)) + average_path_length = np.zeros(n_samples_leaf.shape) + + mask_1 = n_samples_leaf <= 1 + mask_2 = n_samples_leaf == 2 + not_mask = ~np.logical_or(mask_1, mask_2) + + average_path_length[mask_1] = 0. + average_path_length[mask_2] = 1. + average_path_length[not_mask] = ( + 2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma) + - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask] + ) + + return average_path_length.reshape(n_samples_leaf_shape) + +def complex_list_idx(target_list, component): + for i, x in enumerate(target_list): + if np.all(x==component): + # print(i) + return i + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): + if tree_num == num_trees: + vote = 0 + for t in range(num_trees): + vote += (leaf_info["tree "+str(t)][vote_list[t]][0] + _average_path_length(leaf_info["tree "+str(t)][vote_list[t]][1])) + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] + # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + if vote >= path_len_threshold*num_trees: + g_table['votes to class'][num]['class'] = 0 + else: + g_table['votes to class'][num]['class'] = 1 + # g_table['votes to class'][num]['class'] = vote + # print(g_table['votes to class'][num]) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) + tree_num -= 1 + return g_table, num + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + num_features = Planter_config['data config']['number of features'] + num_samples = Planter_config['model config']['number of samples'] + num_classes = Planter_config['model config']['number of classes'] + # num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.5, 2)) + print("The threshold of path length is %.2f" % path_len_threshold) + # max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + + + rng = np.random.RandomState(42) + + + + # fit the model + clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) + clf.fit(train_X) + + clf.decision_function(train_X) + + y_pred_test = clf.predict(test_X) + sklearn_y_predict = copy.deepcopy(y_pred_test) + + for i in range(len(y_pred_test)): + if y_pred_test[i] == -1: + sklearn_y_predict[i] = 1 + if y_pred_test[i] == 1: + sklearn_y_predict[i] = 0 + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + g_table = {} + leaf_info = {} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx, estimator in enumerate(clf.estimators_): + g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) + + + g_table['votes to class'] = {} + print("\nGenerating vote to class table...", end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) + print('Done') + + for t in range(num_trees): + leaf_info['tree ' + str(t)] = list(leaf_info['tree ' + str(t)]) + for i, x in enumerate(leaf_info['tree ' + str(t)]): + leaf_info['tree ' + str(t)][i] = str(list(x)) + + for t in range(num_trees): + for k in g_table[t]['code to vote'].keys(): + g_table[t]['code to vote'][k]['leaf'] = leaf_info['tree ' + str(t)].index(str(list(g_table[t]['code to vote'][k]['leaf']))) + + for k in g_table['votes to class'].keys(): + for t in range(num_trees): + g_table['votes to class'][k]['t'+str(t)+' vote'] = leaf_info['tree ' + str(t)].index(str(list(g_table['votes to class'][k]['t'+str(t)+' vote']))) + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros((num_trees, num_features)) + for i in range(num_features): + for tree in range(num_trees): + # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) + code_width_tree_feature[tree, i] = int(np.ceil(math.log( + g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) + # print('stop') + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature ' + str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i != 0: + print('') + print('Begine transfer: Feature table ' + str(i)) + LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + print("\nPreparing default vote...", end="") + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + LPM_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) + print('Done') + # ===================== prepare default class ========================= + print("Preparing default class...", end="") + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + code_table_size = 0 + LPM_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + print('Done') + + table_name = 'LPM_Table.json' + json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) + print('LPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "IF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + +def test_tables(sklearn_test_y, test_X, test_y): + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list) != str(lpm_code_list): + print('error in exact to ternary match', code_list, lpm_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), + f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), + end=" ") + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) diff --git a/src/models/IF/Type_Simplified_EB/dedicated_p4.py b/src/models/IF/Type_Simplified_EB/dedicated_p4.py index eb4699d..5aead3c 100755 --- a/src/models/IF/Type_Simplified_EB/dedicated_p4.py +++ b/src/models/IF/Type_Simplified_EB/dedicated_p4.py @@ -1,317 +1,317 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { hdr.Planter.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + - "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { hdr.Planter.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + + "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - + int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/IF/Type_Simplified_EB/readme.md b/src/models/IF/Type_Simplified_EB/readme.md index 6dff52f..a9901f8 100644 --- a/src/models/IF/Type_Simplified_EB/readme.md +++ b/src/models/IF/Type_Simplified_EB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for IF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/IF/Type_Simplified_EB/table_generator.py b/src/models/IF/Type_Simplified_EB/table_generator.py index 7bfd676..7ff528a 100755 --- a/src/models/IF/Type_Simplified_EB/table_generator.py +++ b/src/models/IF/Type_Simplified_EB/table_generator.py @@ -1,558 +1,558 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import matplotlib.pyplot as plt -from sklearn.ensemble import IsolationForest -import math -import json -import copy -from sklearn.metrics import * -import re -from src.functions.json_encoder import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -from src.functions.Muti_Exact_to_LPM import * - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - n_node_samples = tree.tree_.n_node_samples - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - # print(node) - ind = n_node_samples[node] - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - num_nodes = 0 - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - num_nodes += 1 - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num_nodes) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - # print(classfication) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - leaf_info['tree '+str(tree_index)]= np.unique(classfication) - return g_table, leaf_info - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): - if tree_num == num_trees: - vote = 0 - for t in range(num_trees): - vote += leaf_info["tree "+str(t)][vote_list[t]] - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] - # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - if vote >= path_len_threshold*num_trees: - g_table['votes to class'][num]['class'] = 0 - else: - g_table['votes to class'][num]['class'] = 1 - # g_table['votes to class'][num]['class'] = vote - # print(g_table['votes to class'][num]) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) - tree_num -= 1 - return g_table, num - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - num_features = Planter_config['data config']['number of features'] - num_samples = Planter_config['model config']['number of samples'] - num_classes = Planter_config['model config']['number of classes'] - # num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.6, 2)) - print("The threshold of path length is %.2f" % path_len_threshold) - # max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - rng = np.random.RandomState(42) - - - - # fit the model - clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) - clf.fit(train_X) - - clf.decision_function(train_X) - - y_pred_test = clf.predict(test_X) - sklearn_y_predict = copy.deepcopy(y_pred_test) - - for i in range(len(y_pred_test)): - if y_pred_test[i] == -1: - sklearn_y_predict[i] = 1 - if y_pred_test[i] == 1: - sklearn_y_predict[i] = 0 - - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - g_table = {} - leaf_info = {} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx, estimator in enumerate(clf.estimators_): - g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) - - - - g_table['votes to class'] = {} - print("\nGenerating vote to class table...", end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) - print('Done') - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - code_width_tree_feature = np.zeros((num_trees, num_features)) - for i in range(num_features): - for tree in range(num_trees): - # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) - code_width_tree_feature[tree, i] = int(np.ceil(math.log( - g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, - 2) + 1)) or 1 - # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) - # print('stop') - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature ' + str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i != 0: - print('') - print('Begine transfer: Feature table ' + str(i)) - LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - LPM_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) - - # ===================== prepare default class ========================= - - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - LPM_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - - table_name = 'LPM_Table.json' - json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) - print('\nLPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "IF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - -def test_tables(sklearn_test_y, test_X, test_y): - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list) != str(lpm_code_list): - print('error in exact to ternary match', code_list, lpm_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i != 0: - print( - '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), - f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), - end=" ") - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.ensemble import IsolationForest +import math +import json +import copy +from sklearn.metrics import * +import re +from src.functions.json_encoder import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +from src.functions.Muti_Exact_to_LPM import * + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + n_node_samples = tree.tree_.n_node_samples + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + # print(node) + ind = n_node_samples[node] + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + num_nodes = 0 + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + num_nodes += 1 + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num_nodes) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max, leaf_info): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + # print(classfication) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + leaf_info['tree '+str(tree_index)]= np.unique(classfication) + return g_table, leaf_info + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold): + if tree_num == num_trees: + vote = 0 + for t in range(num_trees): + vote += leaf_info["tree "+str(t)][vote_list[t]] + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = leaf_info["tree "+str(t)][vote_list[t]] + # g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + if vote >= path_len_threshold*num_trees: + g_table['votes to class'][num]['class'] = 0 + else: + g_table['votes to class'][num]['class'] = 1 + # g_table['votes to class'][num]['class'] = vote + # print(g_table['votes to class'][num]) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info, path_len_threshold) + tree_num -= 1 + return g_table, num + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of samples'] = int(input('- Number of samples? (default = 128) ') or '128') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + num_features = Planter_config['data config']['number of features'] + num_samples = Planter_config['model config']['number of samples'] + num_classes = Planter_config['model config']['number of classes'] + # num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + path_len_threshold = (2 * (np.log(num_samples - 1) + np.euler_gamma) - (2 * (num_samples - 1) / num_samples)) * (-math.log(0.6, 2)) + print("The threshold of path length is %.2f" % path_len_threshold) + # max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + rng = np.random.RandomState(42) + + + + # fit the model + clf = IsolationForest( n_estimators= num_trees, max_samples=num_samples, random_state=rng) + clf.fit(train_X) + + clf.decision_function(train_X) + + y_pred_test = clf.predict(test_X) + sklearn_y_predict = copy.deepcopy(y_pred_test) + + for i in range(len(y_pred_test)): + if y_pred_test[i] == -1: + sklearn_y_predict[i] = 1 + if y_pred_test[i] == 1: + sklearn_y_predict[i] = 0 + + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + g_table = {} + leaf_info = {} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx, estimator in enumerate(clf.estimators_): + g_table, leaf_info = generate_table(estimator, idx, num_features, g_table, feature_max, leaf_info) + + + + g_table['votes to class'] = {} + print("\nGenerating vote to class table...", end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info, path_len_threshold) + print('Done') + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + code_width_tree_feature = np.zeros((num_trees, num_features)) + for i in range(num_features): + for tree in range(num_trees): + # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) + code_width_tree_feature[tree, i] = int(np.ceil(math.log( + g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))] + 1, + 2) + 1)) or 1 + # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) + # print('stop') + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature ' + str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature " + str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i != 0: + print('') + print('Begine transfer: Feature table ' + str(i)) + LPM_Table['feature ' + str(i)] = Table_to_LPM(LPM_Table['feature ' + str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + LPM_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) + + # ===================== prepare default class ========================= + + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + LPM_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + + table_name = 'LPM_Table.json' + json.dump(LPM_Table, open('Tables/' + table_name, 'w'), indent=4, cls=NpEncoder) + print('\nLPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4, cls=NpEncoder) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "IF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + +def test_tables(sklearn_test_y, test_X, test_y): + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list) != str(lpm_code_list): + print('error in exact to ternary match', code_list, lpm_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i != 0: + print( + '\rswitch_prediction: {}, sklearn: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, sklearn_test_y[i], test_y[i], correct / (i + 1), same / (i + 1), + f1_score(switch_test_y[:i], test_y[:i]), f1_score(sklearn_test_y[:i], test_y[:i])), + end=" ") + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) diff --git a/src/models/IF/readme.md b/src/models/IF/readme.md index a87c3ca..aa26f53 100644 --- a/src/models/IF/readme.md +++ b/src/models/IF/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported IF. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported IF. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KM/Type_1/dedicated_p4.py b/src/models/KM/Type_1/dedicated_p4.py index f83015d..00f37bf 100755 --- a/src/models/KM/Type_1/dedicated_p4.py +++ b/src/models/KM/Type_1/dedicated_p4.py @@ -1,300 +1,300 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> middle_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c+1, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) - ingress.write("){\n") - if f==0: - for c in range(0, config['num_classes']): - ingress.write( " meta.middle_c" + str(c) + " = f" + str(f) + "c" + str( c) + ";\n") - else: - for c in range(0, config['num_classes']): - ingress.write(" meta.middle_c" + str(c)+" = meta.middle_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { meta.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) - +" = meta.middle_c" +str(int(compare[1])) +" - meta.middle_c"+str(int(compare[0]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"meta.result = "+str(int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - # print(con_list, c_n) - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" - +(config['num_bits']-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - # ingress.write(" class_prob.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" compare();\n\n") - - - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature" + str(f) - Entry["match"] = {} - Entry["match"]["meta.feature" + str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["f" + str(f) + "c" + str(c)] = Exact_Table['feature ' + str(f)][idx]["c" + str(c)] - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(key) + " => ") - for c in range(num_classes): - file.write(str(Exact_Table['feature ' + str(f)][idx]["c" + str(c)]) + " ") - file.write("\n") - file.write("\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in (table['feature " + str(f) + "'].keys()):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for c in range(0, config['num_classes']): - if c==0: - tload.write("table['feature " + str(f) + "'][key]['c" + str(c) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['c"+str(c)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> middle_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c+1, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) + ingress.write("){\n") + if f==0: + for c in range(0, config['num_classes']): + ingress.write( " meta.middle_c" + str(c) + " = f" + str(f) + "c" + str( c) + ";\n") + else: + for c in range(0, config['num_classes']): + ingress.write(" meta.middle_c" + str(c)+" = meta.middle_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { meta.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) + +" = meta.middle_c" +str(int(compare[1])) +" - meta.middle_c"+str(int(compare[0]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"meta.result = "+str(int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + # print(con_list, c_n) + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" + +(config['num_bits']-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + # ingress.write(" class_prob.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" compare();\n\n") + + + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature" + str(f) + Entry["match"] = {} + Entry["match"]["meta.feature" + str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["f" + str(f) + "c" + str(c)] = Exact_Table['feature ' + str(f)][idx]["c" + str(c)] + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(key) + " => ") + for c in range(num_classes): + file.write(str(Exact_Table['feature ' + str(f)][idx]["c" + str(c)]) + " ") + file.write("\n") + file.write("\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in (table['feature " + str(f) + "'].keys()):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for c in range(0, config['num_classes']): + if c==0: + tload.write("table['feature " + str(f) + "'][key]['c" + str(c) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['c"+str(c)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + + + + diff --git a/src/models/KM/Type_1/readme.md b/src/models/KM/Type_1/readme.md index f63d182..2114aef 100644 --- a/src/models/KM/Type_1/readme.md +++ b/src/models/KM/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KM/Type_1/table_generator.py b/src/models/KM/Type_1/table_generator.py index 554f867..524a929 100755 --- a/src/models/KM/Type_1/table_generator.py +++ b/src/models/KM/Type_1/table_generator.py @@ -1,217 +1,217 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.metrics import * -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -import os -import sys -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * - - -def run_model(train_X, train_y, test_X, test_y, used_features): - - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of bits'] = int(input('- Number of bits for each action data? (default = 16) ') or '16') - Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 5) ') or '5') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - random_state = Planter_config['model config']['random state'] - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t) + 1] - # print(feature_max) - - feature_min = [] - for i in feature_names: - t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] - feature_min += [np.min(t_t) ] - # print(feature_min) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # kmeans fit - kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) - # kmeans = KMeans(n_clusters=num_classes, n_init=random_state).fit(train_X,train_y) - - - sklearn_y_predict = kmeans.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - centre = kmeans.cluster_centers_ - - # record the model - outputfile = 'src/temp/Kmeans.txt' - centers = {} - model = open(outputfile,"w+") - for c in range(len(centre)): - model.write("centre point for class "+str(c)+" : \n") - centers["c"+str(c)]={} - model.write("(") - for f in range(num_features): - centers["c"+str(c)]['f'+str(f)] = centre[c][f] - if f+1>=num_features: - model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") - else: - model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) - model.write(";\n") - model.close() - - Tables= {} - value_info = {} - value_info["max"] = 0 - for f in range(num_features): - Tables['feature ' + str(f)] = {} - x_m = np.mean(train_X[feature_names[f]]) - x_std = np.std(train_X[feature_names[f]]) - for input_value in range(feature_min[f], feature_max[f]): - Tables['feature '+str(f)][input_value] = {} - for c in range(num_classes): - value = (centers["c"+str(c)]['f'+str(f)] - input_value) ** 2 - Tables['feature ' + str(f)][input_value]["c" + str(c)] = value - if value>value_info["max"]: - value_info["max"] = value - - scale = (2**num_bits)/ (value_info["max"]*num_features) - - Exact_Table = {} - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for input_value in range(feature_min[f], feature_max[f]): - Exact_Table['feature ' + str(f)][input_value] = {} - for c in range(num_classes): - value = copy.deepcopy(Tables['feature ' + str(f)][input_value]["c" + str(c)]) - value = int(np.floor(value*scale)) - Exact_Table['feature ' + str(f)][input_value]["c" + str(c)] = value - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "KM" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - distance = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - for f in range (num_features): - for c in range(num_classes): - distance[c] += Exact_Table['feature ' + str(f)][str(input_feature_value[f])]["c"+str(c)] - - - switch_prediction = distance.index(np.min(distance)) - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - - print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.metrics import * +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +import os +import sys +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * + + +def run_model(train_X, train_y, test_X, test_y, used_features): + + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of bits'] = int(input('- Number of bits for each action data? (default = 16) ') or '16') + Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 5) ') or '5') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + random_state = Planter_config['model config']['random state'] + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t) + 1] + # print(feature_max) + + feature_min = [] + for i in feature_names: + t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] + feature_min += [np.min(t_t) ] + # print(feature_min) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # kmeans fit + kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) + # kmeans = KMeans(n_clusters=num_classes, n_init=random_state).fit(train_X,train_y) + + + sklearn_y_predict = kmeans.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + centre = kmeans.cluster_centers_ + + # record the model + outputfile = 'src/temp/Kmeans.txt' + centers = {} + model = open(outputfile,"w+") + for c in range(len(centre)): + model.write("centre point for class "+str(c)+" : \n") + centers["c"+str(c)]={} + model.write("(") + for f in range(num_features): + centers["c"+str(c)]['f'+str(f)] = centre[c][f] + if f+1>=num_features: + model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") + else: + model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) + model.write(";\n") + model.close() + + Tables= {} + value_info = {} + value_info["max"] = 0 + for f in range(num_features): + Tables['feature ' + str(f)] = {} + x_m = np.mean(train_X[feature_names[f]]) + x_std = np.std(train_X[feature_names[f]]) + for input_value in range(feature_min[f], feature_max[f]): + Tables['feature '+str(f)][input_value] = {} + for c in range(num_classes): + value = (centers["c"+str(c)]['f'+str(f)] - input_value) ** 2 + Tables['feature ' + str(f)][input_value]["c" + str(c)] = value + if value>value_info["max"]: + value_info["max"] = value + + scale = (2**num_bits)/ (value_info["max"]*num_features) + + Exact_Table = {} + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for input_value in range(feature_min[f], feature_max[f]): + Exact_Table['feature ' + str(f)][input_value] = {} + for c in range(num_classes): + value = copy.deepcopy(Tables['feature ' + str(f)][input_value]["c" + str(c)]) + value = int(np.floor(value*scale)) + Exact_Table['feature ' + str(f)][input_value]["c" + str(c)] = value + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "KM" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + distance = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + for f in range (num_features): + for c in range(num_classes): + distance[c] += Exact_Table['feature ' + str(f)][str(input_feature_value[f])]["c"+str(c)] + + + switch_prediction = distance.index(np.min(distance)) + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + + print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/KM/Type_EB/dedicated_p4.py b/src/models/KM/Type_EB/dedicated_p4.py index ae2a3ba..6be6485 100755 --- a/src/models/KM/Type_EB/dedicated_p4.py +++ b/src/models/KM/Type_EB/dedicated_p4.py @@ -1,179 +1,179 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['tbl_len'] = Planter_config['p4 config']["table length"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n" - # " bit<8> Place_holder;\n" - # "}\n\n") - headers.write(" bit<8> Place_holder;\n" ) - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action extract_label(bit<16> label){\n" - " hdr.Planter.result = (bit<32>) label;\n" - " }\n\n") - - - - - ingress.write(" table lookup_clustream {\n" - " key = { hdr.Planter.feature0:ternary; }\n" - " actions = {\n" - " extract_label();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['tbl_len']) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" lookup_clustream.apply();\n") - - - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for idx in Ternary_Table: - priority = int(idx) - key = Ternary_Table[idx][1] - mask = Ternary_Table[idx][0] - label = Ternary_Table[idx][2] - file.write("table_add SwitchIngress.lookup_clustream extract_label " + - str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - tload.write("print('load clustream table with',len(table.keys()),'entries')\n" - "for idx in table:\n") - tload.write(" key = table[idx][1]\n" - " mask = table[idx][0]\n" - " label = table[idx][2]\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" - " print(idx)\n") - - - - - - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['tbl_len'] = Planter_config['p4 config']["table length"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n" + # " bit<8> Place_holder;\n" + # "}\n\n") + headers.write(" bit<8> Place_holder;\n" ) + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action extract_label(bit<16> label){\n" + " hdr.Planter.result = (bit<32>) label;\n" + " }\n\n") + + + + + ingress.write(" table lookup_clustream {\n" + " key = { hdr.Planter.feature0:ternary; }\n" + " actions = {\n" + " extract_label();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['tbl_len']) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" lookup_clustream.apply();\n") + + + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for idx in Ternary_Table: + priority = int(idx) + key = Ternary_Table[idx][1] + mask = Ternary_Table[idx][0] + label = Ternary_Table[idx][2] + file.write("table_add SwitchIngress.lookup_clustream extract_label " + + str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + tload.write("print('load clustream table with',len(table.keys()),'entries')\n" + "for idx in table:\n") + tload.write(" key = table[idx][1]\n" + " mask = table[idx][0]\n" + " label = table[idx][2]\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" + " print(idx)\n") + + + + + + + + + diff --git a/src/models/KM/Type_EB/readme.md b/src/models/KM/Type_EB/readme.md index f63d182..2114aef 100644 --- a/src/models/KM/Type_EB/readme.md +++ b/src/models/KM/Type_EB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KM/Type_EB/table_generator.py b/src/models/KM/Type_EB/table_generator.py index fb6a53e..60dc47d 100755 --- a/src/models/KM/Type_EB/table_generator.py +++ b/src/models/KM/Type_EB/table_generator.py @@ -1,367 +1,367 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import math - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.metrics import * -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -import os -import sys -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * -from src.functions.numeric_conversion import * - - - - - - -def relative_code_lookup(idx, num_features, feature_num, look_up, label): - if feature_num ==num_features: - code = '' - for f in range(num_features): - code += str(int(idx[f])) - look_up[code] = label - label += 1 - return look_up, label - else: - for r in [0,1]: - idx[feature_num] = r - feature_num+=1 - look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) - feature_num-=1 - return look_up, label - - -def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): - code = '' - need_split = True - while need_split: - if num_depth == depth_num: - break - center = np.zeros(num_features) - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - con = '' - for f in range(num_features): - if x[f]>= center[f]: - con += '1' - border_min[f] = copy.deepcopy(center[f]) - else: - con += '0' - border_max[f] = copy.deepcopy(center[f]) - code += ten_to_bin(lookup[con], num_features) - depth_num += 1 - return code - - - -def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): - if num_features == feature_num: - border_dict[idx] = copy.deepcopy(value_list) - idx+=1 - return border_dict, idx - else: - for i in [0,1]: - if i==0: - value_list[feature_num] = border_min[feature_num] - else: - value_list[feature_num] = border_max[feature_num] - feature_num += 1 - border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) - feature_num -= 1 - return border_dict, idx - - -def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, clf_centers): - not_finish = False - border_max_test = np.zeros(num_features) - border_min_test = np.zeros(num_features) - for f in range(num_features): - - border_min_test[f] = center[f] - width[f] - border_max_test[f] = center[f] + width[f] - - border_list = {} - border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) - # print(border_list) - for idx in border_list: - dis = np.zeros(num_classes).tolist() - for c in range(num_classes): - for f in range(num_features): - dis[c] += (clf_centers['c'+str(c)]['f'+str(f)] - border_list[idx][f])**2 - # print(dis, dis.index(np.min(dis))) - if idx == 0: - cla = dis.index(np.min(dis)) - else: - if cla != dis.index(np.min(dis)): - not_finish = True - cla = 404 - break - # print(clf_centers) - return not_finish, cla - - - - -def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, clf_centers, division, is_return): - cla = 0 - # if not is_return: - is_return == False - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 - - # print('0. center', center, 'width', width,'depth',depth_num,'division', division) - not_finish = True - if depth_num ==0: - not_finish = True - elif depth_num < num_depth: - not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, clf_centers) - else: - not_finish = False - # cla = 404 - _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, clf_centers) - # print('1. ', not_finish, cla) - - if not_finish: - # print('go depper:', depth_num) - for division in lookup: - new_boarder_max = np.zeros(num_features) - new_boarder_mim = np.zeros(num_features) - for f in range(num_features): - if division[f] == '0': - new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) - new_boarder_max[f] = copy.deepcopy(center[f]) - else: - new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) - new_boarder_mim[f] = copy.deepcopy(center[f]) - # print('3. max', new_boarder_max, 'min', new_boarder_mim) - depth_num += 1 - code += ten_to_bin(lookup[division], num_features) - table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, - num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), - copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), clf_centers, division, is_return) - depth_num -=1 - code = code[:-num_features] - else: - mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') - value = code+(num_depth-depth_num)*(num_features*'0') - - table[idx] = [int(mask,2), int(value,2), cla] - idx += 1 - is_return == True - return table, idx, is_return - is_return == True - return table, idx, is_return - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['depth of quadtree'] = int(input('- Number of depth of the quadtree? (default = 2) ') or '2') - Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 4) ') or '4') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - random_state = Planter_config['model config']['random state'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['depth of quadtree'] - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t) + 1] - # print(feature_max) - - feature_min = [] - for i in feature_names: - t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] - feature_min += [np.min(t_t) ] - # print(feature_min) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # kmeans fit - kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) - - - sklearn_y_predict = kmeans.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - centre = kmeans.cluster_centers_ - - # record the model - outputfile = 'src/temp/Kmeans.txt' - centers = {} - model = open(outputfile,"w+") - for c in range(len(centre)): - model.write("centre point for class "+str(c)+" : \n") - centers["c"+str(c)]={} - model.write("(") - for f in range(num_features): - centers["c"+str(c)]['f'+str(f)] = centre[c][f] - if f+1>=num_features: - model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") - else: - model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) - model.write(";\n") - model.close() - - print('Generating Ternary Tables for Clustream K-means ... ', end='') - lookup = {} - lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) - Ternary_Table = {} - Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, centers, '', False) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - - - # ========================== prepare the test data ===================================== - for i in range(np.shape(test_X.values)[0]): - distance = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), - copy.deepcopy(feature_min)) - test_X.values[i][0] = int(code, 2) - - # ======================================================================================= - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "KM" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['model config']['lookup'] = lookup - Planter_config['model config']['feature max'] = feature_max - Planter_config['model config']['feature min'] = feature_min - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - random_state = Planter_config['model config']['random state'] - num_depth = Planter_config['model config']['depth of quadtree'] - lookup = Planter_config['model config']['lookup'] - feature_max = Planter_config['model config']['feature max'] - feature_min = Planter_config['model config']['feature min'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - code = test_X.values[i][0] - - match_or_not = False - - - keys = list(Ternary_Table.keys()) - - for count in keys: - - # if input_feature_value[f] & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: - if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: - switch_prediction = Ternary_Table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import math + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.metrics import * +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +import os +import sys +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * +from src.functions.numeric_conversion import * + + + + + + +def relative_code_lookup(idx, num_features, feature_num, look_up, label): + if feature_num ==num_features: + code = '' + for f in range(num_features): + code += str(int(idx[f])) + look_up[code] = label + label += 1 + return look_up, label + else: + for r in [0,1]: + idx[feature_num] = r + feature_num+=1 + look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) + feature_num-=1 + return look_up, label + + +def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): + code = '' + need_split = True + while need_split: + if num_depth == depth_num: + break + center = np.zeros(num_features) + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + con = '' + for f in range(num_features): + if x[f]>= center[f]: + con += '1' + border_min[f] = copy.deepcopy(center[f]) + else: + con += '0' + border_max[f] = copy.deepcopy(center[f]) + code += ten_to_bin(lookup[con], num_features) + depth_num += 1 + return code + + + +def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): + if num_features == feature_num: + border_dict[idx] = copy.deepcopy(value_list) + idx+=1 + return border_dict, idx + else: + for i in [0,1]: + if i==0: + value_list[feature_num] = border_min[feature_num] + else: + value_list[feature_num] = border_max[feature_num] + feature_num += 1 + border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) + feature_num -= 1 + return border_dict, idx + + +def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, clf_centers): + not_finish = False + border_max_test = np.zeros(num_features) + border_min_test = np.zeros(num_features) + for f in range(num_features): + + border_min_test[f] = center[f] - width[f] + border_max_test[f] = center[f] + width[f] + + border_list = {} + border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) + # print(border_list) + for idx in border_list: + dis = np.zeros(num_classes).tolist() + for c in range(num_classes): + for f in range(num_features): + dis[c] += (clf_centers['c'+str(c)]['f'+str(f)] - border_list[idx][f])**2 + # print(dis, dis.index(np.min(dis))) + if idx == 0: + cla = dis.index(np.min(dis)) + else: + if cla != dis.index(np.min(dis)): + not_finish = True + cla = 404 + break + # print(clf_centers) + return not_finish, cla + + + + +def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, clf_centers, division, is_return): + cla = 0 + # if not is_return: + is_return == False + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 + + # print('0. center', center, 'width', width,'depth',depth_num,'division', division) + not_finish = True + if depth_num ==0: + not_finish = True + elif depth_num < num_depth: + not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, clf_centers) + else: + not_finish = False + # cla = 404 + _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, clf_centers) + # print('1. ', not_finish, cla) + + if not_finish: + # print('go depper:', depth_num) + for division in lookup: + new_boarder_max = np.zeros(num_features) + new_boarder_mim = np.zeros(num_features) + for f in range(num_features): + if division[f] == '0': + new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) + new_boarder_max[f] = copy.deepcopy(center[f]) + else: + new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) + new_boarder_mim[f] = copy.deepcopy(center[f]) + # print('3. max', new_boarder_max, 'min', new_boarder_mim) + depth_num += 1 + code += ten_to_bin(lookup[division], num_features) + table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, + num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), + copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), clf_centers, division, is_return) + depth_num -=1 + code = code[:-num_features] + else: + mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') + value = code+(num_depth-depth_num)*(num_features*'0') + + table[idx] = [int(mask,2), int(value,2), cla] + idx += 1 + is_return == True + return table, idx, is_return + is_return == True + return table, idx, is_return + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['depth of quadtree'] = int(input('- Number of depth of the quadtree? (default = 2) ') or '2') + Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 4) ') or '4') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + random_state = Planter_config['model config']['random state'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['depth of quadtree'] + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t) + 1] + # print(feature_max) + + feature_min = [] + for i in feature_names: + t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] + feature_min += [np.min(t_t) ] + # print(feature_min) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # kmeans fit + kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) + + + sklearn_y_predict = kmeans.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + centre = kmeans.cluster_centers_ + + # record the model + outputfile = 'src/temp/Kmeans.txt' + centers = {} + model = open(outputfile,"w+") + for c in range(len(centre)): + model.write("centre point for class "+str(c)+" : \n") + centers["c"+str(c)]={} + model.write("(") + for f in range(num_features): + centers["c"+str(c)]['f'+str(f)] = centre[c][f] + if f+1>=num_features: + model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") + else: + model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) + model.write(";\n") + model.close() + + print('Generating Ternary Tables for Clustream K-means ... ', end='') + lookup = {} + lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) + Ternary_Table = {} + Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, centers, '', False) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + + + # ========================== prepare the test data ===================================== + for i in range(np.shape(test_X.values)[0]): + distance = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), + copy.deepcopy(feature_min)) + test_X.values[i][0] = int(code, 2) + + # ======================================================================================= + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "KM" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['model config']['lookup'] = lookup + Planter_config['model config']['feature max'] = feature_max + Planter_config['model config']['feature min'] = feature_min + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + random_state = Planter_config['model config']['random state'] + num_depth = Planter_config['model config']['depth of quadtree'] + lookup = Planter_config['model config']['lookup'] + feature_max = Planter_config['model config']['feature max'] + feature_min = Planter_config['model config']['feature min'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + code = test_X.values[i][0] + + match_or_not = False + + + keys = list(Ternary_Table.keys()) + + for count in keys: + + # if input_feature_value[f] & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: + if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: + switch_prediction = Ternary_Table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) + + diff --git a/src/models/KM/Type_LB/dedicated_p4.py b/src/models/KM/Type_LB/dedicated_p4.py index 60f6a17..1baa112 100755 --- a/src/models/KM/Type_LB/dedicated_p4.py +++ b/src/models/KM/Type_LB/dedicated_p4.py @@ -1,308 +1,308 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> middle_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c+1, config['num_classes']): - headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for c in range(0, config['num_classes']): - if c==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) - ingress.write("){\n") - if f==0: - for c in range(0, config['num_classes']): - ingress.write( " meta.middle_c" + str(c) + " = f" + str(f) + "c" + str( c) + ";\n") - else: - for c in range(0, config['num_classes']): - ingress.write(" meta.middle_c" + str(c)+" = meta.middle_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) - +" = meta.middle_c" +str(int(compare[1])) +" - meta.middle_c"+str(int(compare[0]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - # print(con_list, c_n) - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" - +(config['num_bits']-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - # ingress.write(" class_prob.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" compare();\n\n") - - - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - # ingress.write(" hdr.Planter.result = (bit<32>)meta.vote_c1;\n") - # ingress.write(" /* Swap the MAC addresses */\n" - # " bit<48> tmp;\n" - # " tmp = hdr.ethernet.dstAddr;\n" - # " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" - # " hdr.ethernet.srcAddr = tmp;\n" - # # " send(3);\n") - # " send(ig_intr_md.ingress_port);\n") - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature" + str(f) - Entry["match"] = {} - Entry["match"]["hdr.Planter.feature" + str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) - Entry["action_params"] = {} - for c in range(num_classes): - Entry["action_params"]["f" + str(f) + "c" + str(c)] = Exact_Table['feature ' + str(f)][idx]["c" + str(c)] - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(key) + " => ") - for c in range(num_classes): - file.write(str(Exact_Table['feature ' + str(f)][idx]["c" + str(c)]) + " ") - file.write("\n") - file.write("\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in (table['feature " + str(f) + "'].keys()):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for c in range(0, config['num_classes']): - if c==0: - tload.write("table['feature " + str(f) + "'][key]['c" + str(c) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['c"+str(c)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> middle_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c+1, config['num_classes']): + headers.write(" bit<" + str(config['num_bits']) + "> compare"+str(c)+"_"+str(c1)+";\n") + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for c in range(0, config['num_classes']): + if c==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "c" + str(c)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"c"+str(c)) + ingress.write("){\n") + if f==0: + for c in range(0, config['num_classes']): + ingress.write( " meta.middle_c" + str(c) + " = f" + str(f) + "c" + str( c) + ";\n") + else: + for c in range(0, config['num_classes']): + ingress.write(" meta.middle_c" + str(c)+" = meta.middle_c" + str(c)+" + f"+str(f)+"c"+str(c) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) + +" = meta.middle_c" +str(int(compare[1])) +" - meta.middle_c"+str(int(compare[0]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + # print(con_list, c_n) + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" + +(config['num_bits']-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + # ingress.write(" class_prob.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" compare();\n\n") + + + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + # ingress.write(" hdr.Planter.result = (bit<32>)meta.vote_c1;\n") + # ingress.write(" /* Swap the MAC addresses */\n" + # " bit<48> tmp;\n" + # " tmp = hdr.ethernet.dstAddr;\n" + # " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" + # " hdr.ethernet.srcAddr = tmp;\n" + # # " send(3);\n") + # " send(ig_intr_md.ingress_port);\n") + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature" + str(f) + Entry["match"] = {} + Entry["match"]["hdr.Planter.feature" + str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) + Entry["action_params"] = {} + for c in range(num_classes): + Entry["action_params"]["f" + str(f) + "c" + str(c)] = Exact_Table['feature ' + str(f)][idx]["c" + str(c)] + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(key) + " => ") + for c in range(num_classes): + file.write(str(Exact_Table['feature ' + str(f)][idx]["c" + str(c)]) + " ") + file.write("\n") + file.write("\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in (table['feature " + str(f) + "'].keys()):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for c in range(0, config['num_classes']): + if c==0: + tload.write("table['feature " + str(f) + "'][key]['c" + str(c) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['c"+str(c)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + + + + diff --git a/src/models/KM/Type_LB/readme.md b/src/models/KM/Type_LB/readme.md index f63d182..2114aef 100644 --- a/src/models/KM/Type_LB/readme.md +++ b/src/models/KM/Type_LB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KM/Type_LB/table_generator.py b/src/models/KM/Type_LB/table_generator.py index fcd401a..947d97c 100755 --- a/src/models/KM/Type_LB/table_generator.py +++ b/src/models/KM/Type_LB/table_generator.py @@ -1,218 +1,218 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.metrics import * -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -import os -import sys -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * - - -def run_model(train_X, train_y, test_X, test_y, used_features): - - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of bits'] = int(input('- Number of bits for each action data? (default = 16) ') or '16') - Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 5) ') or '5') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - random_state = Planter_config['model config']['random state'] - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t) + 1] - # print(feature_max) - - feature_min = [] - for i in feature_names: - t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] - feature_min += [np.min(t_t) ] - # print(feature_min) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # kmeans fit - kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) - # kmeans = KMeans(n_clusters=num_classes, n_init=random_state).fit(train_X,train_y) - - - sklearn_y_predict = kmeans.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - centre = kmeans.cluster_centers_ - - # record the model - outputfile = 'src/temp/Kmeans.txt' - centers = {} - model = open(outputfile,"w+") - for c in range(len(centre)): - model.write("centre point for class "+str(c)+" : \n") - centers["c"+str(c)]={} - model.write("(") - for f in range(num_features): - centers["c"+str(c)]['f'+str(f)] = centre[c][f] - if f+1>=num_features: - model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") - else: - model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) - model.write(";\n") - model.close() - - Tables= {} - value_info = {} - value_info["max"] = 0 - for f in range(num_features): - Tables['feature ' + str(f)] = {} - x_m = np.mean(train_X[feature_names[f]]) - x_std = np.std(train_X[feature_names[f]]) - for input_value in range(feature_min[f], feature_max[f]): - Tables['feature '+str(f)][input_value] = {} - for c in range(num_classes): - value = (centers["c"+str(c)]['f'+str(f)] - input_value) ** 2 - Tables['feature ' + str(f)][input_value]["c" + str(c)] = value - if value>value_info["max"]: - value_info["max"] = value - - scale = (2**num_bits)/ (value_info["max"]*num_features) - - Exact_Table = {} - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for input_value in range(feature_min[f], feature_max[f]): - Exact_Table['feature ' + str(f)][input_value] = {} - for c in range(num_classes): - value = copy.deepcopy(Tables['feature ' + str(f)][input_value]["c" + str(c)]) - value = int(np.floor(value*scale)) - Exact_Table['feature ' + str(f)][input_value]["c" + str(c)] = value - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "KM" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - distance = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - for f in range (num_features): - for c in range(num_classes): - distance[c] += Exact_Table['feature ' + str(f)][str(input_feature_value[f])]["c"+str(c)] - - - switch_prediction = distance.index(np.min(distance)) - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - # result = classification_report(test_y, switch_test_y, digits=3) - # print('\n', result) - print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.metrics import * +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +import os +import sys +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * + + +def run_model(train_X, train_y, test_X, test_y, used_features): + + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of bits'] = int(input('- Number of bits for each action data? (default = 16) ') or '16') + Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 5) ') or '5') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + random_state = Planter_config['model config']['random state'] + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t) + 1] + # print(feature_max) + + feature_min = [] + for i in feature_names: + t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] + feature_min += [np.min(t_t) ] + # print(feature_min) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # kmeans fit + kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) + # kmeans = KMeans(n_clusters=num_classes, n_init=random_state).fit(train_X,train_y) + + + sklearn_y_predict = kmeans.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + centre = kmeans.cluster_centers_ + + # record the model + outputfile = 'src/temp/Kmeans.txt' + centers = {} + model = open(outputfile,"w+") + for c in range(len(centre)): + model.write("centre point for class "+str(c)+" : \n") + centers["c"+str(c)]={} + model.write("(") + for f in range(num_features): + centers["c"+str(c)]['f'+str(f)] = centre[c][f] + if f+1>=num_features: + model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") + else: + model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) + model.write(";\n") + model.close() + + Tables= {} + value_info = {} + value_info["max"] = 0 + for f in range(num_features): + Tables['feature ' + str(f)] = {} + x_m = np.mean(train_X[feature_names[f]]) + x_std = np.std(train_X[feature_names[f]]) + for input_value in range(feature_min[f], feature_max[f]): + Tables['feature '+str(f)][input_value] = {} + for c in range(num_classes): + value = (centers["c"+str(c)]['f'+str(f)] - input_value) ** 2 + Tables['feature ' + str(f)][input_value]["c" + str(c)] = value + if value>value_info["max"]: + value_info["max"] = value + + scale = (2**num_bits)/ (value_info["max"]*num_features) + + Exact_Table = {} + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for input_value in range(feature_min[f], feature_max[f]): + Exact_Table['feature ' + str(f)][input_value] = {} + for c in range(num_classes): + value = copy.deepcopy(Tables['feature ' + str(f)][input_value]["c" + str(c)]) + value = int(np.floor(value*scale)) + Exact_Table['feature ' + str(f)][input_value]["c" + str(c)] = value + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "KM" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + distance = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + for f in range (num_features): + for c in range(num_classes): + distance[c] += Exact_Table['feature ' + str(f)][str(input_feature_value[f])]["c"+str(c)] + + + switch_prediction = distance.index(np.min(distance)) + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + # result = classification_report(test_y, switch_test_y, digits=3) + # print('\n', result) + print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/KM/Type_clustreams/dedicated_p4.py b/src/models/KM/Type_clustreams/dedicated_p4.py index e621fb7..bbc3aee 100755 --- a/src/models/KM/Type_clustreams/dedicated_p4.py +++ b/src/models/KM/Type_clustreams/dedicated_p4.py @@ -1,175 +1,175 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['tbl_len'] = Planter_config['p4 config']["table length"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - headers.write(" bit<8> Place_holder;\n" ) - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action extract_label(bit<16> label){\n" - " meta.result = (bit<32>) label;\n" - " }\n\n") - - - - - ingress.write(" table lookup_clustream {\n" - " key = { meta.feature0:ternary; }\n" - " actions = {\n" - " extract_label();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['tbl_len']) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" lookup_clustream.apply();\n") - - - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for idx in Ternary_Table: - priority = int(idx) - key = Ternary_Table[idx][1] - mask = Ternary_Table[idx][0] - label = Ternary_Table[idx][2] - file.write("table_add SwitchIngress.lookup_clustream extract_label " + - str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - tload.write("print('load clustream table with',len(table.keys()),'entries')\n" - "for idx in table:\n") - tload.write(" key = table[idx][1]\n" - " mask = table[idx][0]\n" - " label = table[idx][2]\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" - " print(idx)\n") - - - - - - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['tbl_len'] = Planter_config['p4 config']["table length"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + headers.write(" bit<8> Place_holder;\n" ) + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action extract_label(bit<16> label){\n" + " meta.result = (bit<32>) label;\n" + " }\n\n") + + + + + ingress.write(" table lookup_clustream {\n" + " key = { meta.feature0:ternary; }\n" + " actions = {\n" + " extract_label();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['tbl_len']) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" lookup_clustream.apply();\n") + + + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for idx in Ternary_Table: + priority = int(idx) + key = Ternary_Table[idx][1] + mask = Ternary_Table[idx][0] + label = Ternary_Table[idx][2] + file.write("table_add SwitchIngress.lookup_clustream extract_label " + + str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + tload.write("print('load clustream table with',len(table.keys()),'entries')\n" + "for idx in table:\n") + tload.write(" key = table[idx][1]\n" + " mask = table[idx][0]\n" + " label = table[idx][2]\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" + " print(idx)\n") + + + + + + + + + diff --git a/src/models/KM/Type_clustreams/readme.md b/src/models/KM/Type_clustreams/readme.md index f63d182..2114aef 100644 --- a/src/models/KM/Type_clustreams/readme.md +++ b/src/models/KM/Type_clustreams/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KM/Type_clustreams/table_generator.py b/src/models/KM/Type_clustreams/table_generator.py index 2322e75..ef39e34 100755 --- a/src/models/KM/Type_clustreams/table_generator.py +++ b/src/models/KM/Type_clustreams/table_generator.py @@ -1,366 +1,366 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import math - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.metrics import * -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -import os -import sys -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * -from src.functions.numeric_conversion import * - - - - - - -def relative_code_lookup(idx, num_features, feature_num, look_up, label): - if feature_num ==num_features: - code = '' - for f in range(num_features): - code += str(int(idx[f])) - look_up[code] = label - label += 1 - return look_up, label - else: - for r in [0,1]: - idx[feature_num] = r - feature_num+=1 - look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) - feature_num-=1 - return look_up, label - - -def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): - code = '' - need_split = True - while need_split: - if num_depth == depth_num: - break - center = np.zeros(num_features) - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - con = '' - for f in range(num_features): - if x[f]>= center[f]: - con += '1' - border_min[f] = copy.deepcopy(center[f]) - else: - con += '0' - border_max[f] = copy.deepcopy(center[f]) - code += ten_to_bin(lookup[con], num_features) - depth_num += 1 - return code - - - -def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): - if num_features == feature_num: - border_dict[idx] = copy.deepcopy(value_list) - idx+=1 - return border_dict, idx - else: - for i in [0,1]: - if i==0: - value_list[feature_num] = border_min[feature_num] - else: - value_list[feature_num] = border_max[feature_num] - feature_num += 1 - border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) - feature_num -= 1 - return border_dict, idx - - -def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, clf_centers): - not_finish = False - border_max_test = np.zeros(num_features) - border_min_test = np.zeros(num_features) - for f in range(num_features): - - border_min_test[f] = center[f] - width[f] - border_max_test[f] = center[f] + width[f] - - border_list = {} - border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) - # print(border_list) - for idx in border_list: - dis = np.zeros(num_classes).tolist() - for c in range(num_classes): - for f in range(num_features): - dis[c] += (clf_centers['c'+str(c)]['f'+str(f)] - border_list[idx][f])**2 - # print(dis, dis.index(np.min(dis))) - if idx == 0: - cla = dis.index(np.min(dis)) - else: - if cla != dis.index(np.min(dis)): - not_finish = True - cla = 404 - break - # print(clf_centers) - return not_finish, cla - - - - -def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, clf_centers, division, is_return): - cla = 0 - # if not is_return: - is_return == False - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 - - - not_finish = True - if depth_num ==0: - not_finish = True - elif depth_num < num_depth: - not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, clf_centers) - else: - not_finish = False - # cla = 404 - _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, clf_centers) - - - if not_finish: - - for division in lookup: - new_boarder_max = np.zeros(num_features) - new_boarder_mim = np.zeros(num_features) - for f in range(num_features): - if division[f] == '0': - new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) - new_boarder_max[f] = copy.deepcopy(center[f]) - else: - new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) - new_boarder_mim[f] = copy.deepcopy(center[f]) - # print('3. max', new_boarder_max, 'min', new_boarder_mim) - depth_num += 1 - code += ten_to_bin(lookup[division], num_features) - table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, - num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), - copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), clf_centers, division, is_return) - depth_num -=1 - code = code[:-num_features] - else: - mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') - value = code+(num_depth-depth_num)*(num_features*'0') - - table[idx] = [int(mask,2), int(value,2), cla] - idx += 1 - is_return == True - return table, idx, is_return - is_return == True - return table, idx, is_return - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['depth of quadtree'] = int(input('- Number of depth of the quadtree? (default = 2) ') or '2') - Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 4) ') or '4') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - random_state = Planter_config['model config']['random state'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['depth of quadtree'] - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t) + 1] - # print(feature_max) - - feature_min = [] - for i in feature_names: - t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] - feature_min += [np.min(t_t) ] - # print(feature_min) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # kmeans fit - kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) - - - sklearn_y_predict = kmeans.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - centre = kmeans.cluster_centers_ - - # record the model - outputfile = 'src/temp/Kmeans.txt' - centers = {} - model = open(outputfile,"w+") - for c in range(len(centre)): - model.write("centre point for class "+str(c)+" : \n") - centers["c"+str(c)]={} - model.write("(") - for f in range(num_features): - centers["c"+str(c)]['f'+str(f)] = centre[c][f] - if f+1>=num_features: - model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") - else: - model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) - model.write(";\n") - model.close() - - print('Generating Ternary Tables for Clustream K-means ... ', end='') - lookup = {} - lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) - Ternary_Table = {} - Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, centers, '', False) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - - - # ========================== prepare the test data ===================================== - for i in range(np.shape(test_X.values)[0]): - distance = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), - copy.deepcopy(feature_min)) - test_X.values[i][0] = int(code, 2) - - # ======================================================================================= - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "KM" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['model config']['lookup'] = lookup - Planter_config['model config']['feature max'] = feature_max - Planter_config['model config']['feature min'] = feature_min - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - random_state = Planter_config['model config']['random state'] - num_depth = Planter_config['model config']['depth of quadtree'] - lookup = Planter_config['model config']['lookup'] - feature_max = Planter_config['model config']['feature max'] - feature_min = Planter_config['model config']['feature min'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - code = test_X.values[i][0] - - match_or_not = False - - - keys = list(Ternary_Table.keys()) - - for count in keys: - - if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: - switch_prediction = Ternary_Table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import math + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.metrics import * +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +import os +import sys +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * +from src.functions.numeric_conversion import * + + + + + + +def relative_code_lookup(idx, num_features, feature_num, look_up, label): + if feature_num ==num_features: + code = '' + for f in range(num_features): + code += str(int(idx[f])) + look_up[code] = label + label += 1 + return look_up, label + else: + for r in [0,1]: + idx[feature_num] = r + feature_num+=1 + look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) + feature_num-=1 + return look_up, label + + +def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): + code = '' + need_split = True + while need_split: + if num_depth == depth_num: + break + center = np.zeros(num_features) + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + con = '' + for f in range(num_features): + if x[f]>= center[f]: + con += '1' + border_min[f] = copy.deepcopy(center[f]) + else: + con += '0' + border_max[f] = copy.deepcopy(center[f]) + code += ten_to_bin(lookup[con], num_features) + depth_num += 1 + return code + + + +def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): + if num_features == feature_num: + border_dict[idx] = copy.deepcopy(value_list) + idx+=1 + return border_dict, idx + else: + for i in [0,1]: + if i==0: + value_list[feature_num] = border_min[feature_num] + else: + value_list[feature_num] = border_max[feature_num] + feature_num += 1 + border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) + feature_num -= 1 + return border_dict, idx + + +def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, clf_centers): + not_finish = False + border_max_test = np.zeros(num_features) + border_min_test = np.zeros(num_features) + for f in range(num_features): + + border_min_test[f] = center[f] - width[f] + border_max_test[f] = center[f] + width[f] + + border_list = {} + border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) + # print(border_list) + for idx in border_list: + dis = np.zeros(num_classes).tolist() + for c in range(num_classes): + for f in range(num_features): + dis[c] += (clf_centers['c'+str(c)]['f'+str(f)] - border_list[idx][f])**2 + # print(dis, dis.index(np.min(dis))) + if idx == 0: + cla = dis.index(np.min(dis)) + else: + if cla != dis.index(np.min(dis)): + not_finish = True + cla = 404 + break + # print(clf_centers) + return not_finish, cla + + + + +def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, clf_centers, division, is_return): + cla = 0 + # if not is_return: + is_return == False + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 + + + not_finish = True + if depth_num ==0: + not_finish = True + elif depth_num < num_depth: + not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, clf_centers) + else: + not_finish = False + # cla = 404 + _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, clf_centers) + + + if not_finish: + + for division in lookup: + new_boarder_max = np.zeros(num_features) + new_boarder_mim = np.zeros(num_features) + for f in range(num_features): + if division[f] == '0': + new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) + new_boarder_max[f] = copy.deepcopy(center[f]) + else: + new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) + new_boarder_mim[f] = copy.deepcopy(center[f]) + # print('3. max', new_boarder_max, 'min', new_boarder_mim) + depth_num += 1 + code += ten_to_bin(lookup[division], num_features) + table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, + num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), + copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), clf_centers, division, is_return) + depth_num -=1 + code = code[:-num_features] + else: + mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') + value = code+(num_depth-depth_num)*(num_features*'0') + + table[idx] = [int(mask,2), int(value,2), cla] + idx += 1 + is_return == True + return table, idx, is_return + is_return == True + return table, idx, is_return + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['depth of quadtree'] = int(input('- Number of depth of the quadtree? (default = 2) ') or '2') + Planter_config['model config']['random state'] = int(input('- What is the model random state? (default = 4) ') or '4') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + random_state = Planter_config['model config']['random state'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['depth of quadtree'] + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t) + 1] + # print(feature_max) + + feature_min = [] + for i in feature_names: + t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] + feature_min += [np.min(t_t) ] + # print(feature_min) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # kmeans fit + kmeans = KMeans(n_clusters=num_classes, random_state=random_state, n_init=random_state).fit(train_X,train_y) + + + sklearn_y_predict = kmeans.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + centre = kmeans.cluster_centers_ + + # record the model + outputfile = 'src/temp/Kmeans.txt' + centers = {} + model = open(outputfile,"w+") + for c in range(len(centre)): + model.write("centre point for class "+str(c)+" : \n") + centers["c"+str(c)]={} + model.write("(") + for f in range(num_features): + centers["c"+str(c)]['f'+str(f)] = centre[c][f] + if f+1>=num_features: + model.write('f' + str(f) + ': ' + str(centre[c][f]) + ")") + else: + model.write( 'f'+str(f)+': '+str(centre[c][f]) + ", " ) + model.write(";\n") + model.close() + + print('Generating Ternary Tables for Clustream K-means ... ', end='') + lookup = {} + lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) + Ternary_Table = {} + Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, centers, '', False) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + + + # ========================== prepare the test data ===================================== + for i in range(np.shape(test_X.values)[0]): + distance = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), + copy.deepcopy(feature_min)) + test_X.values[i][0] = int(code, 2) + + # ======================================================================================= + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "KM" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['model config']['lookup'] = lookup + Planter_config['model config']['feature max'] = feature_max + Planter_config['model config']['feature min'] = feature_min + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + random_state = Planter_config['model config']['random state'] + num_depth = Planter_config['model config']['depth of quadtree'] + lookup = Planter_config['model config']['lookup'] + feature_max = Planter_config['model config']['feature max'] + feature_min = Planter_config['model config']['feature min'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + code = test_X.values[i][0] + + match_or_not = False + + + keys = list(Ternary_Table.keys()) + + for count in keys: + + if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: + switch_prediction = Ternary_Table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) + + diff --git a/src/models/KM/readme.md b/src/models/KM/readme.md index d5b0832..6af8aaf 100644 --- a/src/models/KM/readme.md +++ b/src/models/KM/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported KM. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported KM. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KNN/Type_1/dedicated_p4.py b/src/models/KNN/Type_1/dedicated_p4.py index 2072197..762ca65 100755 --- a/src/models/KNN/Type_1/dedicated_p4.py +++ b/src/models/KNN/Type_1/dedicated_p4.py @@ -1,176 +1,176 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['tbl_len'] = Planter_config['p4 config']["table length"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - - headers.write(" bit<8> Place_holder;\n" ) - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action extract_label(bit<16> label){\n" - " meta.result = (bit<32>) label;\n" - " }\n\n") - - - - - ingress.write(" table lookup_clustream {\n" - " key = { meta.feature0:ternary; }\n" - " actions = {\n" - " extract_label();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['tbl_len']) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" lookup_clustream.apply();\n") - - - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for idx in Ternary_Table: - priority = int(idx) - key = Ternary_Table[idx][1] - mask = Ternary_Table[idx][0] - label = Ternary_Table[idx][2] - file.write("table_add SwitchIngress.lookup_clustream extract_label " + - str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - tload.write("print('load clustream table with',len(table.keys()),'entries')\n" - "for idx in table:\n") - tload.write(" key = table[idx][1]\n" - " mask = table[idx][0]\n" - " label = table[idx][2]\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" - " print(idx)\n") - - - - - - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['tbl_len'] = Planter_config['p4 config']["table length"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + + headers.write(" bit<8> Place_holder;\n" ) + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action extract_label(bit<16> label){\n" + " meta.result = (bit<32>) label;\n" + " }\n\n") + + + + + ingress.write(" table lookup_clustream {\n" + " key = { meta.feature0:ternary; }\n" + " actions = {\n" + " extract_label();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['tbl_len']) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" lookup_clustream.apply();\n") + + + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for idx in Ternary_Table: + priority = int(idx) + key = Ternary_Table[idx][1] + mask = Ternary_Table[idx][0] + label = Ternary_Table[idx][2] + file.write("table_add SwitchIngress.lookup_clustream extract_label " + + str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + tload.write("print('load clustream table with',len(table.keys()),'entries')\n" + "for idx in table:\n") + tload.write(" key = table[idx][1]\n" + " mask = table[idx][0]\n" + " label = table[idx][2]\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" + " print(idx)\n") + + + + + + + + + diff --git a/src/models/KNN/Type_1/readme.md b/src/models/KNN/Type_1/readme.md index 95f7ba1..ed86506 100644 --- a/src/models/KNN/Type_1/readme.md +++ b/src/models/KNN/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KNN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KNN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KNN/Type_1/table_generator.py b/src/models/KNN/Type_1/table_generator.py index 5f5abca..7a2cba3 100755 --- a/src/models/KNN/Type_1/table_generator.py +++ b/src/models/KNN/Type_1/table_generator.py @@ -1,342 +1,342 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng, Mingyuan Zang -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import math - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.metrics import * -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -import os -import sys -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * -from src.functions.numeric_conversion import * - - - - - - -def relative_code_lookup(idx, num_features, feature_num, look_up, label): - if feature_num ==num_features: - code = '' - for f in range(num_features): - code += str(int(idx[f])) - look_up[code] = label - label += 1 - return look_up, label - else: - for r in [0,1]: - idx[feature_num] = r - feature_num+=1 - look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) - feature_num-=1 - return look_up, label - - -def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): - code = '' - need_split = True - while need_split: - if num_depth == depth_num: - break - center = np.zeros(num_features) - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - con = '' - for f in range(num_features): - if x[f]>= center[f]: - con += '1' - border_min[f] = copy.deepcopy(center[f]) - else: - con += '0' - border_max[f] = copy.deepcopy(center[f]) - code += ten_to_bin(lookup[con], num_features) - depth_num += 1 - return code - - - -def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): - if num_features == feature_num: - border_dict[idx] = copy.deepcopy(value_list) - idx+=1 - return border_dict, idx - else: - for i in [0,1]: - if i==0: - value_list[feature_num] = border_min[feature_num] - else: - value_list[feature_num] = border_max[feature_num] - feature_num += 1 - border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) - feature_num -= 1 - return border_dict, idx - - -def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, knn_clf): - not_finish = False - border_max_test = np.zeros(num_features) - border_min_test = np.zeros(num_features) - for f in range(num_features): - - border_min_test[f] = center[f] - width[f] - border_max_test[f] = center[f] + width[f] - - border_list = {} - border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) - # print(border_list) - x_border = [] - for idx in border_list: - x_border += [list(border_list[idx])] - - y_border = knn_clf.predict(x_border) - if len(np.unique(y_border)) !=1: - not_finish = True - cla = 404 - else: - cla = np.unique(y_border)[0] - return not_finish, cla - - - - -def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, knn_clf, division, is_return): - cla = 0 - # if not is_return: - is_return == False - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 - - - not_finish = True - if depth_num ==0: - not_finish = True - elif depth_num < num_depth: - not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, knn_clf) - else: - not_finish = False - # cla = 404 - _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, knn_clf) - - - if not_finish: - - for division in lookup: - new_boarder_max = np.zeros(num_features) - new_boarder_mim = np.zeros(num_features) - for f in range(num_features): - if division[f] == '0': - new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) - new_boarder_max[f] = copy.deepcopy(center[f]) - else: - new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) - new_boarder_mim[f] = copy.deepcopy(center[f]) - # print('3. max', new_boarder_max, 'min', new_boarder_mim) - depth_num += 1 - code += ten_to_bin(lookup[division], num_features) - table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, - num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), - copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), knn_clf, division, is_return) - depth_num -=1 - code = code[:-num_features] - else: - mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') - value = code+(num_depth-depth_num)*(num_features*'0') - - table[idx] = [int(mask,2), int(value,2), cla] - idx += 1 - is_return == True - return table, idx, is_return - is_return == True - return table, idx, is_return - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['depth of quadtree'] = np.int(input('- Number of depth of the quadtree? (default = 2) ') or '2') - Planter_config['model config']['number of neighbours'] = np.int(input('- Number of neighbours of the knn? (default = 4) ') or '4') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - num_neighbours = Planter_config['model config']['number of neighbours'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['depth of quadtree'] - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t) + 1] - # print(feature_max) - - feature_min = [] - for i in feature_names: - t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] - feature_min += [np.min(t_t) ] - # print(feature_min) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # knn fit - knn_clf = KNeighborsClassifier(n_neighbors=num_neighbours, algorithm='kd_tree') - knn_clf.fit(train_X, train_y) - - sklearn_y_predict = knn_clf.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - print('Generating Ternary Tables for Clustream K-means ... ', end='') - lookup = {} - lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) - Ternary_Table = {} - Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, knn_clf, '', False) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4, cls=NpEncoder) - - - # ========================== prepare the test data ===================================== - for i in range(np.shape(test_X.values)[0]): - distance = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), - copy.deepcopy(feature_min)) - test_X.values[i][0] = int(code, 2) - - # ======================================================================================= - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "KNN" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['model config']['lookup'] = lookup - Planter_config['model config']['feature max'] = feature_max - Planter_config['model config']['feature min'] = feature_min - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['depth of quadtree'] - lookup = Planter_config['model config']['lookup'] - feature_max = Planter_config['model config']['feature max'] - feature_min = Planter_config['model config']['feature min'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - code = test_X.values[i][0] - - match_or_not = False - - - keys = list(Ternary_Table.keys()) - - for count in keys: - - if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: - switch_prediction = Ternary_Table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng, Mingyuan Zang +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import math + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.metrics import * +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +import os +import sys +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * +from src.functions.numeric_conversion import * + + + + + + +def relative_code_lookup(idx, num_features, feature_num, look_up, label): + if feature_num ==num_features: + code = '' + for f in range(num_features): + code += str(int(idx[f])) + look_up[code] = label + label += 1 + return look_up, label + else: + for r in [0,1]: + idx[feature_num] = r + feature_num+=1 + look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) + feature_num-=1 + return look_up, label + + +def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): + code = '' + need_split = True + while need_split: + if num_depth == depth_num: + break + center = np.zeros(num_features) + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + con = '' + for f in range(num_features): + if x[f]>= center[f]: + con += '1' + border_min[f] = copy.deepcopy(center[f]) + else: + con += '0' + border_max[f] = copy.deepcopy(center[f]) + code += ten_to_bin(lookup[con], num_features) + depth_num += 1 + return code + + + +def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): + if num_features == feature_num: + border_dict[idx] = copy.deepcopy(value_list) + idx+=1 + return border_dict, idx + else: + for i in [0,1]: + if i==0: + value_list[feature_num] = border_min[feature_num] + else: + value_list[feature_num] = border_max[feature_num] + feature_num += 1 + border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) + feature_num -= 1 + return border_dict, idx + + +def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, knn_clf): + not_finish = False + border_max_test = np.zeros(num_features) + border_min_test = np.zeros(num_features) + for f in range(num_features): + + border_min_test[f] = center[f] - width[f] + border_max_test[f] = center[f] + width[f] + + border_list = {} + border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) + # print(border_list) + x_border = [] + for idx in border_list: + x_border += [list(border_list[idx])] + + y_border = knn_clf.predict(x_border) + if len(np.unique(y_border)) !=1: + not_finish = True + cla = 404 + else: + cla = np.unique(y_border)[0] + return not_finish, cla + + + + +def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, knn_clf, division, is_return): + cla = 0 + # if not is_return: + is_return == False + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 + + + not_finish = True + if depth_num ==0: + not_finish = True + elif depth_num < num_depth: + not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, knn_clf) + else: + not_finish = False + # cla = 404 + _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, knn_clf) + + + if not_finish: + + for division in lookup: + new_boarder_max = np.zeros(num_features) + new_boarder_mim = np.zeros(num_features) + for f in range(num_features): + if division[f] == '0': + new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) + new_boarder_max[f] = copy.deepcopy(center[f]) + else: + new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) + new_boarder_mim[f] = copy.deepcopy(center[f]) + # print('3. max', new_boarder_max, 'min', new_boarder_mim) + depth_num += 1 + code += ten_to_bin(lookup[division], num_features) + table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, + num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), + copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), knn_clf, division, is_return) + depth_num -=1 + code = code[:-num_features] + else: + mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') + value = code+(num_depth-depth_num)*(num_features*'0') + + table[idx] = [int(mask,2), int(value,2), cla] + idx += 1 + is_return == True + return table, idx, is_return + is_return == True + return table, idx, is_return + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['depth of quadtree'] = np.int(input('- Number of depth of the quadtree? (default = 2) ') or '2') + Planter_config['model config']['number of neighbours'] = np.int(input('- Number of neighbours of the knn? (default = 4) ') or '4') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + num_neighbours = Planter_config['model config']['number of neighbours'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['depth of quadtree'] + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t) + 1] + # print(feature_max) + + feature_min = [] + for i in feature_names: + t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] + feature_min += [np.min(t_t) ] + # print(feature_min) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # knn fit + knn_clf = KNeighborsClassifier(n_neighbors=num_neighbours, algorithm='kd_tree') + knn_clf.fit(train_X, train_y) + + sklearn_y_predict = knn_clf.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + print('Generating Ternary Tables for Clustream K-means ... ', end='') + lookup = {} + lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) + Ternary_Table = {} + Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, knn_clf, '', False) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4, cls=NpEncoder) + + + # ========================== prepare the test data ===================================== + for i in range(np.shape(test_X.values)[0]): + distance = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), + copy.deepcopy(feature_min)) + test_X.values[i][0] = int(code, 2) + + # ======================================================================================= + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "KNN" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['model config']['lookup'] = lookup + Planter_config['model config']['feature max'] = feature_max + Planter_config['model config']['feature min'] = feature_min + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['depth of quadtree'] + lookup = Planter_config['model config']['lookup'] + feature_max = Planter_config['model config']['feature max'] + feature_min = Planter_config['model config']['feature min'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + code = test_X.values[i][0] + + match_or_not = False + + + keys = list(Ternary_Table.keys()) + + for count in keys: + + if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: + switch_prediction = Ternary_Table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) + + diff --git a/src/models/KNN/Type_EB/dedicated_p4.py b/src/models/KNN/Type_EB/dedicated_p4.py index b33ab93..96a229b 100755 --- a/src/models/KNN/Type_EB/dedicated_p4.py +++ b/src/models/KNN/Type_EB/dedicated_p4.py @@ -1,179 +1,179 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['tbl_len'] = Planter_config['p4 config']["table length"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n" - # " bit<8> Place_holder;\n" - # "}\n\n") - headers.write(" bit<8> Place_holder;\n" ) - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action extract_label(bit<16> label){\n" - " hdr.Planter.result = (bit<32>) label;\n" - " }\n\n") - - - - - ingress.write(" table lookup_clustream {\n" - " key = { hdr.Planter.feature0:ternary; }\n" - " actions = {\n" - " extract_label();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['tbl_len']) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" lookup_clustream.apply();\n") - - - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for idx in Ternary_Table: - priority = int(idx) - key = Ternary_Table[idx][1] - mask = Ternary_Table[idx][0] - label = Ternary_Table[idx][2] - file.write("table_add SwitchIngress.lookup_clustream extract_label " + - str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - tload.write("print('load clustream table with',len(table.keys()),'entries')\n" - "for idx in table:\n") - tload.write(" key = table[idx][1]\n" - " mask = table[idx][0]\n" - " label = table[idx][2]\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" - " print(idx)\n") - - - - - - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['tbl_len'] = Planter_config['p4 config']["table length"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n" + # " bit<8> Place_holder;\n" + # "}\n\n") + headers.write(" bit<8> Place_holder;\n" ) + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action extract_label(bit<16> label){\n" + " hdr.Planter.result = (bit<32>) label;\n" + " }\n\n") + + + + + ingress.write(" table lookup_clustream {\n" + " key = { hdr.Planter.feature0:ternary; }\n" + " actions = {\n" + " extract_label();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['tbl_len']) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" lookup_clustream.apply();\n") + + + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for idx in Ternary_Table: + priority = int(idx) + key = Ternary_Table[idx][1] + mask = Ternary_Table[idx][0] + label = Ternary_Table[idx][2] + file.write("table_add SwitchIngress.lookup_clustream extract_label " + + str(key) + "&&&" + str(mask) + " => " + str(label) + " " + str(priority) + "\n") + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + tload.write("print('load clustream table with',len(table.keys()),'entries')\n" + "for idx in table:\n") + tload.write(" key = table[idx][1]\n" + " mask = table[idx][0]\n" + " label = table[idx][2]\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_clustream.add_with_extract_label(key, mask, int(idx), label)\n" + " print(idx)\n") + + + + + + + + + diff --git a/src/models/KNN/Type_EB/readme.md b/src/models/KNN/Type_EB/readme.md index 95f7ba1..ed86506 100644 --- a/src/models/KNN/Type_EB/readme.md +++ b/src/models/KNN/Type_EB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KNN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for KNN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/KNN/Type_EB/table_generator.py b/src/models/KNN/Type_EB/table_generator.py index 91e5c54..413376a 100755 --- a/src/models/KNN/Type_EB/table_generator.py +++ b/src/models/KNN/Type_EB/table_generator.py @@ -1,344 +1,344 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng, Mingyuan Zang -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -import math - -import numpy as np -import pandas as pd -import argparse -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.metrics import * -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -import os -import sys -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.normalization import * -from src.functions.json_encoder import * -from src.functions.numeric_conversion import * - - - - - - -def relative_code_lookup(idx, num_features, feature_num, look_up, label): - if feature_num ==num_features: - code = '' - for f in range(num_features): - code += str(int(idx[f])) - look_up[code] = label - label += 1 - return look_up, label - else: - for r in [0,1]: - idx[feature_num] = r - feature_num+=1 - look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) - feature_num-=1 - return look_up, label - - -def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): - code = '' - need_split = True - while need_split: - if num_depth == depth_num: - break - center = np.zeros(num_features) - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - con = '' - for f in range(num_features): - if x[f]>= center[f]: - con += '1' - border_min[f] = copy.deepcopy(center[f]) - else: - con += '0' - border_max[f] = copy.deepcopy(center[f]) - code += ten_to_bin(lookup[con], num_features) - depth_num += 1 - return code - - - -def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): - if num_features == feature_num: - border_dict[idx] = copy.deepcopy(value_list) - idx+=1 - return border_dict, idx - else: - for i in [0,1]: - if i==0: - value_list[feature_num] = border_min[feature_num] - else: - value_list[feature_num] = border_max[feature_num] - feature_num += 1 - border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) - feature_num -= 1 - return border_dict, idx - - -def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, knn_clf): - not_finish = False - border_max_test = np.zeros(num_features) - border_min_test = np.zeros(num_features) - for f in range(num_features): - - border_min_test[f] = center[f] - width[f] - border_max_test[f] = center[f] + width[f] - - border_list = {} - border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) - # print(border_list) - x_border = [] - for idx in border_list: - x_border += [list(border_list[idx])] - - y_border = knn_clf.predict(x_border) - if len(np.unique(y_border)) !=1: - not_finish = True - cla = 404 - else: - cla = np.unique(y_border)[0] - return not_finish, cla - - - - -def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, knn_clf, division, is_return): - cla = 0 - # if not is_return: - is_return == False - for f in range(num_features): - center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 - width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 - - # print('0. center', center, 'width', width,'depth',depth_num,'division', division) - not_finish = True - if depth_num ==0: - not_finish = True - elif depth_num < num_depth: - not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, knn_clf) - else: - not_finish = False - # cla = 404 - _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, - num_depth, num_classes, knn_clf) - # print('1. ', not_finish, cla) - - if not_finish: - # print('go depper:', depth_num) - for division in lookup: - new_boarder_max = np.zeros(num_features) - new_boarder_mim = np.zeros(num_features) - for f in range(num_features): - if division[f] == '0': - new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) - new_boarder_max[f] = copy.deepcopy(center[f]) - else: - new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) - new_boarder_mim[f] = copy.deepcopy(center[f]) - # print('3. max', new_boarder_max, 'min', new_boarder_mim) - depth_num += 1 - code += ten_to_bin(lookup[division], num_features) - table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, - num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), - copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), knn_clf, division, is_return) - depth_num -=1 - code = code[:-num_features] - else: - mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') - value = code+(num_depth-depth_num)*(num_features*'0') - - table[idx] = [int(mask,2), int(value,2), cla] - idx += 1 - is_return == True - return table, idx, is_return - is_return == True - return table, idx, is_return - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['depth of quadtree'] = np.int(input('- Number of depth of the quadtree? (default = 2) ') or '2') - Planter_config['model config']['number of neighbours'] = np.int(input('- Number of neighbours of the knn? (default = 4) ') or '4') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - num_neighbours = Planter_config['model config']['number of neighbours'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['depth of quadtree'] - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t) + 1] - # print(feature_max) - - feature_min = [] - for i in feature_names: - t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] - feature_min += [np.min(t_t) ] - # print(feature_min) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # knn fit - knn_clf = KNeighborsClassifier(n_neighbors=num_neighbours, algorithm='kd_tree') - knn_clf.fit(train_X, train_y) - - sklearn_y_predict = knn_clf.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - print('Generating Ternary Tables for Clustream K-means ... ', end='') - lookup = {} - lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) - Ternary_Table = {} - Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, knn_clf, '', False) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4, cls=NpEncoder) - - - # ========================== prepare the test data ===================================== - for i in range(np.shape(test_X.values)[0]): - distance = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), - copy.deepcopy(feature_min)) - test_X.values[i][0] = int(code, 2) - - # ======================================================================================= - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "KNN" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['model config']['lookup'] = lookup - Planter_config['model config']['feature max'] = feature_max - Planter_config['model config']['feature min'] = feature_min - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['depth of quadtree'] - lookup = Planter_config['model config']['lookup'] - feature_max = Planter_config['model config']['feature max'] - feature_min = Planter_config['model config']['feature min'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - for i in range(np.shape(test_X.values)[0]): - code = test_X.values[i][0] - - match_or_not = False - - - keys = list(Ternary_Table.keys()) - - for count in keys: - - # if input_feature_value[f] & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: - if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: - switch_prediction = Ternary_Table[count][2] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - - switch_test_y += [switch_prediction] - - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng, Mingyuan Zang +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +import math + +import numpy as np +import pandas as pd +import argparse +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.metrics import * +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +import os +import sys +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.normalization import * +from src.functions.json_encoder import * +from src.functions.numeric_conversion import * + + + + + + +def relative_code_lookup(idx, num_features, feature_num, look_up, label): + if feature_num ==num_features: + code = '' + for f in range(num_features): + code += str(int(idx[f])) + look_up[code] = label + label += 1 + return look_up, label + else: + for r in [0,1]: + idx[feature_num] = r + feature_num+=1 + look_up,label = relative_code_lookup(idx,num_features,feature_num, look_up, label) + feature_num-=1 + return look_up, label + + +def get_codes(lookup, x, num_features, num_depth, depth_num, border_max, border_min): + code = '' + need_split = True + while need_split: + if num_depth == depth_num: + break + center = np.zeros(num_features) + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + con = '' + for f in range(num_features): + if x[f]>= center[f]: + con += '1' + border_min[f] = copy.deepcopy(center[f]) + else: + con += '0' + border_max[f] = copy.deepcopy(center[f]) + code += ten_to_bin(lookup[con], num_features) + depth_num += 1 + return code + + + +def get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx): + if num_features == feature_num: + border_dict[idx] = copy.deepcopy(value_list) + idx+=1 + return border_dict, idx + else: + for i in [0,1]: + if i==0: + value_list[feature_num] = border_min[feature_num] + else: + value_list[feature_num] = border_max[feature_num] + feature_num += 1 + border_dict, idx = get_boarder_list(border_dict, num_features, feature_num, num_depth, border_max, border_min, value_list, idx) + feature_num -= 1 + return border_dict, idx + + +def check_if_not_finish(center,width, division, num_features,num_depth, num_classes, knn_clf): + not_finish = False + border_max_test = np.zeros(num_features) + border_min_test = np.zeros(num_features) + for f in range(num_features): + + border_min_test[f] = center[f] - width[f] + border_max_test[f] = center[f] + width[f] + + border_list = {} + border_list, _ = get_boarder_list(border_list, num_features, 0, num_depth, border_max_test, border_min_test, np.zeros(num_features), 0) + # print(border_list) + x_border = [] + for idx in border_list: + x_border += [list(border_list[idx])] + + y_border = knn_clf.predict(x_border) + if len(np.unique(y_border)) !=1: + not_finish = True + cla = 404 + else: + cla = np.unique(y_border)[0] + return not_finish, cla + + + + +def clustream(table, idx, code, lookup, num_features, num_classes, num_depth, depth_num, center, width, border_max, border_min, knn_clf, division, is_return): + cla = 0 + # if not is_return: + is_return == False + for f in range(num_features): + center[f] = (copy.deepcopy(border_max[f]) + copy.deepcopy(border_min[f])) / 2 + width[f] = (copy.deepcopy(border_max[f]) - copy.deepcopy(border_min[f])) / 2 + + # print('0. center', center, 'width', width,'depth',depth_num,'division', division) + not_finish = True + if depth_num ==0: + not_finish = True + elif depth_num < num_depth: + not_finish, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, knn_clf) + else: + not_finish = False + # cla = 404 + _, cla = check_if_not_finish(copy.deepcopy(center), copy.deepcopy(width), division, num_features, + num_depth, num_classes, knn_clf) + # print('1. ', not_finish, cla) + + if not_finish: + # print('go depper:', depth_num) + for division in lookup: + new_boarder_max = np.zeros(num_features) + new_boarder_mim = np.zeros(num_features) + for f in range(num_features): + if division[f] == '0': + new_boarder_mim[f] = copy.deepcopy(center[f]) - copy.deepcopy(width[f]) + new_boarder_max[f] = copy.deepcopy(center[f]) + else: + new_boarder_max[f] = copy.deepcopy(center[f]) + copy.deepcopy(width[f]) + new_boarder_mim[f] = copy.deepcopy(center[f]) + # print('3. max', new_boarder_max, 'min', new_boarder_mim) + depth_num += 1 + code += ten_to_bin(lookup[division], num_features) + table, idx, is_return = clustream(table, idx, copy.deepcopy(code), lookup, num_features, num_classes, + num_depth, copy.deepcopy(depth_num), copy.deepcopy(center), copy.deepcopy(width), + copy.deepcopy(new_boarder_max), copy.deepcopy(new_boarder_mim), knn_clf, division, is_return) + depth_num -=1 + code = code[:-num_features] + else: + mask = (depth_num)*(num_features*'1')+(num_depth-depth_num)*(num_features*'0') + value = code+(num_depth-depth_num)*(num_features*'0') + + table[idx] = [int(mask,2), int(value,2), cla] + idx += 1 + is_return == True + return table, idx, is_return + is_return == True + return table, idx, is_return + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['depth of quadtree'] = np.int(input('- Number of depth of the quadtree? (default = 2) ') or '2') + Planter_config['model config']['number of neighbours'] = np.int(input('- Number of neighbours of the knn? (default = 4) ') or '4') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + num_neighbours = Planter_config['model config']['number of neighbours'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['depth of quadtree'] + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t) + 1] + # print(feature_max) + + feature_min = [] + for i in feature_names: + t_t = [test_X[[i]].min()[0], train_X[[i]].min()[0]] + feature_min += [np.min(t_t) ] + # print(feature_min) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # knn fit + knn_clf = KNeighborsClassifier(n_neighbors=num_neighbours, algorithm='kd_tree') + knn_clf.fit(train_X, train_y) + + sklearn_y_predict = knn_clf.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + print('Generating Ternary Tables for Clustream K-means ... ', end='') + lookup = {} + lookup, _ = relative_code_lookup(np.zeros(num_features), num_features, 0, lookup, 0) + Ternary_Table = {} + Ternary_Table, _, _ = clustream(Ternary_Table, 0, '', lookup, num_features, num_classes, num_depth, 0, np.zeros(num_features), np.zeros(num_features), feature_max, feature_min, knn_clf, '', False) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4, cls=NpEncoder) + + + # ========================== prepare the test data ===================================== + for i in range(np.shape(test_X.values)[0]): + distance = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + code = get_codes(lookup, input_feature_value, num_features, num_depth, 0, copy.deepcopy(feature_max), + copy.deepcopy(feature_min)) + test_X.values[i][0] = int(code, 2) + + # ======================================================================================= + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "KNN" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["table length"] = len(Ternary_Table.keys()) + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['model config']['lookup'] = lookup + Planter_config['model config']['feature max'] = feature_max + Planter_config['model config']['feature min'] = feature_min + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['depth of quadtree'] + lookup = Planter_config['model config']['lookup'] + feature_max = Planter_config['model config']['feature max'] + feature_min = Planter_config['model config']['feature min'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + for i in range(np.shape(test_X.values)[0]): + code = test_X.values[i][0] + + match_or_not = False + + + keys = list(Ternary_Table.keys()) + + for count in keys: + + # if input_feature_value[f] & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: + if code & Ternary_Table[count][0] == Ternary_Table[count][0] & Ternary_Table[count][1]: + switch_prediction = Ternary_Table[count][2] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + + switch_test_y += [switch_prediction] + + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + print('\nThe accuracy of the match action format of Kmeans is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Ternary match entries: ',np.sum(Planter_config['p4 config']["table length"]) ) + + diff --git a/src/models/KNN/readme.md b/src/models/KNN/readme.md index 85d8572..58b1055 100644 --- a/src/models/KNN/readme.md +++ b/src/models/KNN/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported KNN. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported KNN. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/NN/Type_1/BinaryNet/README.md b/src/models/NN/Type_1/BinaryNet/README.md index 2697020..d1c6639 100755 --- a/src/models/NN/Type_1/BinaryNet/README.md +++ b/src/models/NN/Type_1/BinaryNet/README.md @@ -1,52 +1,52 @@ -# Binary Neural Networks on PyTorch - -![Binarization](https://github.com/lucamocerino/Binary-Neural-Networks-PyTorch-1.0/blob/master/bin.png) - - -This repository implements three popular papers that introduced the concept of Binary Neural Networks: -- **XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks**: https://arxiv.org/abs/1603.05279. -- **Binarized Neural Networks** :https://papers.nips.cc/paper/6573-binarized-neural-networks -- **DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients** :https://arxiv.org/abs/1606.06160 - - - -The project is organized as follows: - - - **models** folder contains CNN models (simple mlp, Network-in-Network, LeNet5, etc.) - - **classifiers/{type}_classifier.py** contains the test and train procedures; where type = {bnn, xnor, dorefa} - - **models/{type}_layers.py** contains the binarylayers implementation (binary activation, binary conv and fully-connected layers, gradient update); where type = {bnn, xnor, dorefa} - - **yml** folder contains configuration files with hyperparameters - - **main.py** represents the entry file - -### Installation - -All packages are in *requirement.txt* -Install the dependencies: - -```sh -pip install -r requirements.txt -``` -### Basic usage -```sh -$ python main.py app:{yml_file} -``` -### Example -Network-in-Network on CIFAR10 dataset. All hyper parameters are in .yml file. -```sh -$ python main.py app:yml/nin_cifar10.yml -``` -## Related Applications -If you find this code useful in your research, please consider citing one of the works in this section. - - - Fast and Accurate Inference on Microcontrollers With Boosted Cooperative Convolutional Neural Networks (BC-Net) https://ieeexplore.ieee.org/abstract/document/9275360 - - CoopNet: Cooperative Convolutional Neural Network for Low-Power MCUs https://ieeexplore.ieee.org/abstract/document/8964993 - - TentacleNet: A Pseudo-Ensemble Template for Accurate Binary Convolutional Neural Networks https://ieeexplore.ieee.org/abstract/document/9073982/ - -License ----- - -MIT - - - - +# Binary Neural Networks on PyTorch + +![Binarization](https://github.com/lucamocerino/Binary-Neural-Networks-PyTorch-1.0/blob/master/bin.png) + + +This repository implements three popular papers that introduced the concept of Binary Neural Networks: +- **XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks**: https://arxiv.org/abs/1603.05279. +- **Binarized Neural Networks** :https://papers.nips.cc/paper/6573-binarized-neural-networks +- **DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients** :https://arxiv.org/abs/1606.06160 + + + +The project is organized as follows: + + - **models** folder contains CNN models (simple mlp, Network-in-Network, LeNet5, etc.) + - **classifiers/{type}_classifier.py** contains the test and train procedures; where type = {bnn, xnor, dorefa} + - **models/{type}_layers.py** contains the binarylayers implementation (binary activation, binary conv and fully-connected layers, gradient update); where type = {bnn, xnor, dorefa} + - **yml** folder contains configuration files with hyperparameters + - **main.py** represents the entry file + +### Installation + +All packages are in *requirement.txt* +Install the dependencies: + +```sh +pip install -r requirements.txt +``` +### Basic usage +```sh +$ python main.py app:{yml_file} +``` +### Example +Network-in-Network on CIFAR10 dataset. All hyper parameters are in .yml file. +```sh +$ python main.py app:yml/nin_cifar10.yml +``` +## Related Applications +If you find this code useful in your research, please consider citing one of the works in this section. + + - Fast and Accurate Inference on Microcontrollers With Boosted Cooperative Convolutional Neural Networks (BC-Net) https://ieeexplore.ieee.org/abstract/document/9275360 + - CoopNet: Cooperative Convolutional Neural Network for Low-Power MCUs https://ieeexplore.ieee.org/abstract/document/8964993 + - TentacleNet: A Pseudo-Ensemble Template for Accurate Binary Convolutional Neural Networks https://ieeexplore.ieee.org/abstract/document/9073982/ + +License +---- + +MIT + + + + diff --git a/src/models/NN/Type_1/BinaryNet/classifiers/bnn_classifier.py b/src/models/NN/Type_1/BinaryNet/classifiers/bnn_classifier.py index 22f271f..6087dd5 100755 --- a/src/models/NN/Type_1/BinaryNet/classifiers/bnn_classifier.py +++ b/src/models/NN/Type_1/BinaryNet/classifiers/bnn_classifier.py @@ -1,112 +1,112 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -import shutil - - -class BnnClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def top1_accuracy(self): - return top1_accuracy(self.model, self.test_loader, self.device) - - - def train_step(self, criterion, optimizer): - losses = [] - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - optimizer.zero_grad() - loss.backward() - for p in self.model.modules(): - if hasattr(p, 'weight_org'): - p.weight.data.copy_(p.weight_org) - optimizer.step() - for p in self.model.modules(): - if hasattr(p, 'weight_org'): - p.weight_org.data.copy_(p.weight.data.clamp_(-1,1)) - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +import shutil + + +class BnnClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def top1_accuracy(self): + return top1_accuracy(self.model, self.test_loader, self.device) + + + def train_step(self, criterion, optimizer): + losses = [] + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + optimizer.zero_grad() + loss.backward() + for p in self.model.modules(): + if hasattr(p, 'weight_org'): + p.weight.data.copy_(p.weight_org) + optimizer.step() + for p in self.model.modules(): + if hasattr(p, 'weight_org'): + p.weight_org.data.copy_(p.weight.data.clamp_(-1,1)) + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_1/BinaryNet/classifiers/dorefa_classifier.py b/src/models/NN/Type_1/BinaryNet/classifiers/dorefa_classifier.py index 2bb54b9..2f9a667 100755 --- a/src/models/NN/Type_1/BinaryNet/classifiers/dorefa_classifier.py +++ b/src/models/NN/Type_1/BinaryNet/classifiers/dorefa_classifier.py @@ -1,109 +1,109 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -import shutil - -class DorefaClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def train_step(self, criterion, optimizer): - losses = [] - self.model.train() - - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - - - data, target = data.to(self.device), target.to(self.device) - optimizer.zero_grad() - - - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - loss.backward() - - optimizer.step() - - - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +import shutil + +class DorefaClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def train_step(self, criterion, optimizer): + losses = [] + self.model.train() + + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + + + data, target = data.to(self.device), target.to(self.device) + optimizer.zero_grad() + + + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + loss.backward() + + optimizer.step() + + + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_1/BinaryNet/classifiers/xnor_classifier.py b/src/models/NN/Type_1/BinaryNet/classifiers/xnor_classifier.py index 9e133f2..de212d1 100755 --- a/src/models/NN/Type_1/BinaryNet/classifiers/xnor_classifier.py +++ b/src/models/NN/Type_1/BinaryNet/classifiers/xnor_classifier.py @@ -1,128 +1,128 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -from src.models.NN.Type_1.BinaryNet.models.xnor_layers import XNORConv2d -import shutil -from sklearn.metrics import * - -class XnorClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - first = True - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - if first: - - nn_pred = pred[:,0].numpy() - label = target.numpy() - # print(nn_pred, output) - first = False - else: - # print(nn_pred, output) - nn_pred = np.hstack((nn_pred, pred[:,0].numpy())) - label = np.hstack((label, target.numpy())) - - result = classification_report(nn_pred, label, digits=4) - print('\n', result) - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def train_step(self, criterion, optimizer): - losses = [] - self.model.train() - - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - - - data, target = data.to(self.device), target.to(self.device) - optimizer.zero_grad() - - - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - loss.backward() - - for m in self.model.modules(): - if isinstance(m, XNORConv2d): - m.update_gradient() - - optimizer.step() - - - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +from src.models.NN.Type_1.BinaryNet.models.xnor_layers import XNORConv2d +import shutil +from sklearn.metrics import * + +class XnorClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + first = True + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + if first: + + nn_pred = pred[:,0].numpy() + label = target.numpy() + # print(nn_pred, output) + first = False + else: + # print(nn_pred, output) + nn_pred = np.hstack((nn_pred, pred[:,0].numpy())) + label = np.hstack((label, target.numpy())) + + result = classification_report(nn_pred, label, digits=4) + print('\n', result) + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def train_step(self, criterion, optimizer): + losses = [] + self.model.train() + + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + + + data, target = data.to(self.device), target.to(self.device) + optimizer.zero_grad() + + + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + loss.backward() + + for m in self.model.modules(): + if isinstance(m, XNORConv2d): + m.update_gradient() + + optimizer.step() + + + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_1/BinaryNet/config.py b/src/models/NN/Type_1/BinaryNet/config.py index 51c5c8e..dd377fe 100755 --- a/src/models/NN/Type_1/BinaryNet/config.py +++ b/src/models/NN/Type_1/BinaryNet/config.py @@ -1,169 +1,169 @@ -"""config utilities for yml file.""" -import os -import sys -import yaml - -# singletone -FLAGS = None - - -class LoaderMeta(type): - """Constructor for supporting `!include`. - """ - def __new__(mcs, __name__, __bases__, __dict__): - """Add include constructer to class.""" - # register the include constructor on the class - cls = super().__new__(mcs, __name__, __bases__, __dict__) - cls.add_constructor('!include', cls.construct_include) - return cls - - -class Loader(yaml.Loader, metaclass=LoaderMeta): - """YAML Loader with `!include` constructor. - """ - def __init__(self, stream): - try: - self._root = os.path.split(stream.name)[0] - except AttributeError: - self._root = os.path.curdir - super().__init__(stream) - - def construct_include(self, node): - """Include file referenced at node.""" - filename = os.path.abspath( - os.path.join(self._root, self.construct_scalar(node))) - extension = os.path.splitext(filename)[1].lstrip('.') - with open(filename, 'r') as f: - if extension in ('yaml', 'yml'): - return yaml.load(f, Loader) - else: - return ''.join(f.readlines()) - - -class AttrDict(dict): - """Dict as attribute trick. - - """ - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, dict): - self.__dict__[key] = AttrDict(value) - elif isinstance(value, list): - if isinstance(value[0], dict): - self.__dict__[key] = [AttrDict(item) for item in value] - else: - self.__dict__[key] = value - - def yaml(self): - """Convert object to yaml dict and return. - - """ - yaml_dict = {} - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, AttrDict): - yaml_dict[key] = value.yaml() - elif isinstance(value, list): - if isinstance(value[0], AttrDict): - new_l = [] - for item in value: - new_l.append(item.yaml()) - yaml_dict[key] = new_l - else: - yaml_dict[key] = value - else: - yaml_dict[key] = value - return yaml_dict - - def __repr__(self): - """Print all variables. - - """ - ret_str = [] - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, AttrDict): - ret_str.append('{}:'.format(key)) - child_ret_str = value.__repr__().split('\n') - for item in child_ret_str: - ret_str.append(' ' + item) - elif isinstance(value, list): - if isinstance(value[0], AttrDict): - ret_str.append('{}:'.format(key)) - for item in value: - # treat as AttrDict above - child_ret_str = item.__repr__().split('\n') - for item in child_ret_str: - ret_str.append(' ' + item) - else: - ret_str.append('{}: {}'.format(key, value)) - else: - ret_str.append('{}: {}'.format(key, value)) - return '\n'.join(ret_str) - - -class Config(AttrDict): - """Config with yaml file. - - This class is used to config model hyper-parameters, global constants, and - other settings with yaml file. All settings in yaml file will be - automatically logged into file. - - Args: - filename(str): File name. - - Examples: - - yaml file ``model.yml``:: - - NAME: 'neuralgym' - ALPHA: 1.0 - DATASET: '/mnt/data/imagenet' - - Usage in .py: - - >>> from neuralgym import Config - >>> config = Config('model.yml') - >>> print(config.NAME) - neuralgym - >>> print(config.ALPHA) - 1.0 - >>> print(config.DATASET) - /mnt/data/imagenet - - """ - - def __init__(self, filename=None, verbose=False): - assert os.path.exists(filename), 'File {} not exist.'.format(filename) - try: - with open(filename, 'r') as f: - cfg_dict = yaml.load(f, Loader) - except EnvironmentError: - print('Please check the file with name of "%s"', filename) - super(Config, self).__init__(cfg_dict) - if verbose: - print(' pi.cfg '.center(80, '-')) - print(self.__repr__()) - print(''.center(80, '-')) - - -def app(): - """Load app via stdin from subprocess""" - global FLAGS - if FLAGS is None: - job_yaml_file = None - for arg in sys.argv: - if arg.startswith('app:'): - job_yaml_file = arg[4:] - if job_yaml_file is None: - job_yaml_file = sys.stdin.readline() - FLAGS = Config(job_yaml_file) - return FLAGS - else: - return FLAGS - - -app() +"""config utilities for yml file.""" +import os +import sys +import yaml + +# singletone +FLAGS = None + + +class LoaderMeta(type): + """Constructor for supporting `!include`. + """ + def __new__(mcs, __name__, __bases__, __dict__): + """Add include constructer to class.""" + # register the include constructor on the class + cls = super().__new__(mcs, __name__, __bases__, __dict__) + cls.add_constructor('!include', cls.construct_include) + return cls + + +class Loader(yaml.Loader, metaclass=LoaderMeta): + """YAML Loader with `!include` constructor. + """ + def __init__(self, stream): + try: + self._root = os.path.split(stream.name)[0] + except AttributeError: + self._root = os.path.curdir + super().__init__(stream) + + def construct_include(self, node): + """Include file referenced at node.""" + filename = os.path.abspath( + os.path.join(self._root, self.construct_scalar(node))) + extension = os.path.splitext(filename)[1].lstrip('.') + with open(filename, 'r') as f: + if extension in ('yaml', 'yml'): + return yaml.load(f, Loader) + else: + return ''.join(f.readlines()) + + +class AttrDict(dict): + """Dict as attribute trick. + + """ + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, dict): + self.__dict__[key] = AttrDict(value) + elif isinstance(value, list): + if isinstance(value[0], dict): + self.__dict__[key] = [AttrDict(item) for item in value] + else: + self.__dict__[key] = value + + def yaml(self): + """Convert object to yaml dict and return. + + """ + yaml_dict = {} + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, AttrDict): + yaml_dict[key] = value.yaml() + elif isinstance(value, list): + if isinstance(value[0], AttrDict): + new_l = [] + for item in value: + new_l.append(item.yaml()) + yaml_dict[key] = new_l + else: + yaml_dict[key] = value + else: + yaml_dict[key] = value + return yaml_dict + + def __repr__(self): + """Print all variables. + + """ + ret_str = [] + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, AttrDict): + ret_str.append('{}:'.format(key)) + child_ret_str = value.__repr__().split('\n') + for item in child_ret_str: + ret_str.append(' ' + item) + elif isinstance(value, list): + if isinstance(value[0], AttrDict): + ret_str.append('{}:'.format(key)) + for item in value: + # treat as AttrDict above + child_ret_str = item.__repr__().split('\n') + for item in child_ret_str: + ret_str.append(' ' + item) + else: + ret_str.append('{}: {}'.format(key, value)) + else: + ret_str.append('{}: {}'.format(key, value)) + return '\n'.join(ret_str) + + +class Config(AttrDict): + """Config with yaml file. + + This class is used to config model hyper-parameters, global constants, and + other settings with yaml file. All settings in yaml file will be + automatically logged into file. + + Args: + filename(str): File name. + + Examples: + + yaml file ``model.yml``:: + + NAME: 'neuralgym' + ALPHA: 1.0 + DATASET: '/mnt/data/imagenet' + + Usage in .py: + + >>> from neuralgym import Config + >>> config = Config('model.yml') + >>> print(config.NAME) + neuralgym + >>> print(config.ALPHA) + 1.0 + >>> print(config.DATASET) + /mnt/data/imagenet + + """ + + def __init__(self, filename=None, verbose=False): + assert os.path.exists(filename), 'File {} not exist.'.format(filename) + try: + with open(filename, 'r') as f: + cfg_dict = yaml.load(f, Loader) + except EnvironmentError: + print('Please check the file with name of "%s"', filename) + super(Config, self).__init__(cfg_dict) + if verbose: + print(' pi.cfg '.center(80, '-')) + print(self.__repr__()) + print(''.center(80, '-')) + + +def app(): + """Load app via stdin from subprocess""" + global FLAGS + if FLAGS is None: + job_yaml_file = None + for arg in sys.argv: + if arg.startswith('app:'): + job_yaml_file = arg[4:] + if job_yaml_file is None: + job_yaml_file = sys.stdin.readline() + FLAGS = Config(job_yaml_file) + return FLAGS + else: + return FLAGS + + +app() diff --git a/src/models/NN/Type_1/BinaryNet/dataloader/__init__.py b/src/models/NN/Type_1/BinaryNet/dataloader/__init__.py index 788a17b..d210291 100755 --- a/src/models/NN/Type_1/BinaryNet/dataloader/__init__.py +++ b/src/models/NN/Type_1/BinaryNet/dataloader/__init__.py @@ -1,2 +1,2 @@ -from .cifar10 import * -from .mnist import * +from .cifar10 import * +from .mnist import * diff --git a/src/models/NN/Type_1/BinaryNet/dataloader/cifar10.py b/src/models/NN/Type_1/BinaryNet/dataloader/cifar10.py index ec1333a..9c18a8e 100755 --- a/src/models/NN/Type_1/BinaryNet/dataloader/cifar10.py +++ b/src/models/NN/Type_1/BinaryNet/dataloader/cifar10.py @@ -1,39 +1,39 @@ -import os -import torch -from torchvision.datasets import CIFAR10 -import torchvision.transforms as tvt - - -def load_train_data(batch_size=64, sampler=None): - transform = tvt.Compose([ - tvt.RandomCrop(32, padding=4), - tvt.RandomHorizontalFlip(), - tvt.ToTensor(), - tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - - if sampler is None: - shuffle = True - else: - shuffle = False - - dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=True, - download=True, transform=transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, - shuffle=shuffle, sampler=sampler, num_workers=4, pin_memory=True) - - return loader - - -def load_test_data(batch_size=1000, sampler=None): - transform = tvt.Compose([ - tvt.ToTensor(), - tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - - dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=False, - download=True, transform=transform) - loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, - shuffle=False, sampler=sampler, num_workers=4, pin_memory=True) - - return loader +import os +import torch +from torchvision.datasets import CIFAR10 +import torchvision.transforms as tvt + + +def load_train_data(batch_size=64, sampler=None): + transform = tvt.Compose([ + tvt.RandomCrop(32, padding=4), + tvt.RandomHorizontalFlip(), + tvt.ToTensor(), + tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + + if sampler is None: + shuffle = True + else: + shuffle = False + + dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=True, + download=True, transform=transform) + loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, + shuffle=shuffle, sampler=sampler, num_workers=4, pin_memory=True) + + return loader + + +def load_test_data(batch_size=1000, sampler=None): + transform = tvt.Compose([ + tvt.ToTensor(), + tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + + dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=False, + download=True, transform=transform) + loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, + shuffle=False, sampler=sampler, num_workers=4, pin_memory=True) + + return loader diff --git a/src/models/NN/Type_1/BinaryNet/dataloader/mnist.py b/src/models/NN/Type_1/BinaryNet/dataloader/mnist.py index 7dcf662..86f69ad 100755 --- a/src/models/NN/Type_1/BinaryNet/dataloader/mnist.py +++ b/src/models/NN/Type_1/BinaryNet/dataloader/mnist.py @@ -1,38 +1,38 @@ -from torch.utils.data import DataLoader -from os.path import join -from torchvision.datasets import MNIST -from torchvision.transforms import Compose, Resize, Normalize, ToTensor - - -def load_train_data(batch_size=128, sampler=None): - cuda = True - loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} - - train_loader = DataLoader( - MNIST(join('datasets', 'mnist'), train=True, download=True, - transform=Compose([ - Resize((28, 28)), - ToTensor(), - Normalize((0.1307,),(0.308,)), - ])), - batch_size=batch_size, shuffle=True, **loader_kwargs) - - return train_loader - -def load_test_data(batch_size=1000, sampler=None): - - cuda = True - loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} - - test_loader = DataLoader( - MNIST(join('datasets', 'mnist'), train=False, download=True, - transform=Compose([ - Resize((28, 28)), - ToTensor(), - Normalize((0.1307,),(0.308,)), - ])), - batch_size= batch_size, shuffle=False,sampler=sampler, **loader_kwargs) - - return test_loader - - +from torch.utils.data import DataLoader +from os.path import join +from torchvision.datasets import MNIST +from torchvision.transforms import Compose, Resize, Normalize, ToTensor + + +def load_train_data(batch_size=128, sampler=None): + cuda = True + loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} + + train_loader = DataLoader( + MNIST(join('datasets', 'mnist'), train=True, download=True, + transform=Compose([ + Resize((28, 28)), + ToTensor(), + Normalize((0.1307,),(0.308,)), + ])), + batch_size=batch_size, shuffle=True, **loader_kwargs) + + return train_loader + +def load_test_data(batch_size=1000, sampler=None): + + cuda = True + loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} + + test_loader = DataLoader( + MNIST(join('datasets', 'mnist'), train=False, download=True, + transform=Compose([ + Resize((28, 28)), + ToTensor(), + Normalize((0.1307,),(0.308,)), + ])), + batch_size= batch_size, shuffle=False,sampler=sampler, **loader_kwargs) + + return test_loader + + diff --git a/src/models/NN/Type_1/BinaryNet/main.py b/src/models/NN/Type_1/BinaryNet/main.py index a174316..cf8da5c 100755 --- a/src/models/NN/Type_1/BinaryNet/main.py +++ b/src/models/NN/Type_1/BinaryNet/main.py @@ -1,49 +1,49 @@ -import torch -from classifiers.xnor_classifier import * -from classifiers.dorefa_classifier import * -from classifiers.bnn_classifier import * -from config import FLAGS -import importlib -from models import * - -cuda = torch.cuda.is_available() and not(FLAGS.no_cuda) -device = torch.device('cuda' if cuda else 'cpu') -torch.manual_seed(0) -if cuda: - torch.backends.cudnn.deterministic=True - torch.cuda.manual_seed(0) - -dataset = importlib.import_module("dataloader.{}".format(FLAGS.dataset)) -train_loader = dataset.load_train_data(FLAGS.batch_size) -test_loader = dataset.load_test_data(FLAGS.test_batch_size) - -model = eval(FLAGS.model)() -model.to(device) - - -if FLAGS.bin_type == 'xnor': - classification = XnorClassifier(model, train_loader, test_loader, device) - -elif FLAGS.bin_type == 'bnn': - classification = BnnClassifier(model, train_loader, test_loader, device) - -elif FLAGS.bin_type == 'dorefa': - classification = DorefaClassifier(model, train_loader, test_loader, device) - -criterion = torch.nn.CrossEntropyLoss() -criterion.to(device) - -if hasattr(model, 'init_w'): - model.init_w() - - -if FLAGS.optimizer == 'adam': - optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr, weight_decay=1e-5) -elif FLAGS.optimizer == 'sgd': - optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=0.9, - weight_decay=5.e-4) - -scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, FLAGS.steps, - gamma=FLAGS.gamma) - -classification.train(criterion, optimizer, FLAGS.epochs, scheduler, FLAGS.checkpoint) +import torch +from classifiers.xnor_classifier import * +from classifiers.dorefa_classifier import * +from classifiers.bnn_classifier import * +from config import FLAGS +import importlib +from models import * + +cuda = torch.cuda.is_available() and not(FLAGS.no_cuda) +device = torch.device('cuda' if cuda else 'cpu') +torch.manual_seed(0) +if cuda: + torch.backends.cudnn.deterministic=True + torch.cuda.manual_seed(0) + +dataset = importlib.import_module("dataloader.{}".format(FLAGS.dataset)) +train_loader = dataset.load_train_data(FLAGS.batch_size) +test_loader = dataset.load_test_data(FLAGS.test_batch_size) + +model = eval(FLAGS.model)() +model.to(device) + + +if FLAGS.bin_type == 'xnor': + classification = XnorClassifier(model, train_loader, test_loader, device) + +elif FLAGS.bin_type == 'bnn': + classification = BnnClassifier(model, train_loader, test_loader, device) + +elif FLAGS.bin_type == 'dorefa': + classification = DorefaClassifier(model, train_loader, test_loader, device) + +criterion = torch.nn.CrossEntropyLoss() +criterion.to(device) + +if hasattr(model, 'init_w'): + model.init_w() + + +if FLAGS.optimizer == 'adam': + optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr, weight_decay=1e-5) +elif FLAGS.optimizer == 'sgd': + optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=0.9, + weight_decay=5.e-4) + +scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, FLAGS.steps, + gamma=FLAGS.gamma) + +classification.train(criterion, optimizer, FLAGS.epochs, scheduler, FLAGS.checkpoint) diff --git a/src/models/NN/Type_1/BinaryNet/models/__init__.py b/src/models/NN/Type_1/BinaryNet/models/__init__.py index 20eb788..923977d 100755 --- a/src/models/NN/Type_1/BinaryNet/models/__init__.py +++ b/src/models/NN/Type_1/BinaryNet/models/__init__.py @@ -1,5 +1,5 @@ -from .xnor_nin import * -from .xnor_lenet import * -from .xnor_mlp import * -from .dorefa_resnet import * -from .bnn_caffenet import * +from .xnor_nin import * +from .xnor_lenet import * +from .xnor_mlp import * +from .dorefa_resnet import * +from .bnn_caffenet import * diff --git a/src/models/NN/Type_1/BinaryNet/models/bnn_caffenet.py b/src/models/NN/Type_1/BinaryNet/models/bnn_caffenet.py index c215779..341f43c 100755 --- a/src/models/NN/Type_1/BinaryNet/models/bnn_caffenet.py +++ b/src/models/NN/Type_1/BinaryNet/models/bnn_caffenet.py @@ -1,61 +1,61 @@ -import torch.nn as nn -from .bnn_layers import * - - -__all__ = ['bnn_caffenet'] - - - -class BNNCaffenet(nn.Module): - - def __init__(self, num_classes=10): - super(BNNCaffenet, self).__init__() - - self.features = nn.Sequential( - - BNNConv2d(3, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - nn.Flatten(), - nn.BatchNorm1d(512), - nn.Hardtanh(inplace=True), - BNNLinear(512, num_classes), - nn.BatchNorm1d(num_classes, affine=False), - nn.LogSoftmax(dim=1), - ) - - def forward(self, x): - return self.features(x) - - - def init_w(self): - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - return - - -def bnn_caffenet(num_classes=10): - return BNNCaffenet(num_classes) - +import torch.nn as nn +from .bnn_layers import * + + +__all__ = ['bnn_caffenet'] + + + +class BNNCaffenet(nn.Module): + + def __init__(self, num_classes=10): + super(BNNCaffenet, self).__init__() + + self.features = nn.Sequential( + + BNNConv2d(3, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + nn.Flatten(), + nn.BatchNorm1d(512), + nn.Hardtanh(inplace=True), + BNNLinear(512, num_classes), + nn.BatchNorm1d(num_classes, affine=False), + nn.LogSoftmax(dim=1), + ) + + def forward(self, x): + return self.features(x) + + + def init_w(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + return + + +def bnn_caffenet(num_classes=10): + return BNNCaffenet(num_classes) + diff --git a/src/models/NN/Type_1/BinaryNet/models/bnn_layers.py b/src/models/NN/Type_1/BinaryNet/models/bnn_layers.py index 14a3879..760e124 100755 --- a/src/models/NN/Type_1/BinaryNet/models/bnn_layers.py +++ b/src/models/NN/Type_1/BinaryNet/models/bnn_layers.py @@ -1,62 +1,62 @@ -import torch -from torch.nn import Module, Conv2d, Linear -from torch.nn.functional import linear, conv2d - - -__all__ = ['BNNLinear', 'BNNConv2d'] - - - - -def Binarize(tensor,quant_mode='det'): - if quant_mode=='det': - return tensor.sign() - if quant_mode=='bin': - return (tensor>=0).type(type(tensor))*2-1 - else: - return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) - - -class BNNLinear(Linear): - - def __init__(self, *kargs, **kwargs): - super(BNNLinear, self).__init__(*kargs, **kwargs) - self.register_buffer('weight_org', self.weight.data.clone()) - - def forward(self, input): - - if (input.size(1) != 784) and (input.size(1) != 3072): - input.data=Binarize(input.data) - - self.weight.data=Binarize(self.weight_org) - out = linear(input, self.weight) - - if not self.bias is None: - self.bias.org=self.bias.data.clone() - out += self.bias.view(1, -1).expand_as(out) - - return out - - -class BNNConv2d(Conv2d): - - def __init__(self, *kargs, **kwargs): - super(BNNConv2d, self).__init__(*kargs, **kwargs) - self.register_buffer('weight_org', self.weight.data.clone()) - - def forward(self, input): - if input.size(1) != 3: - input.data = Binarize(input.data) - - self.weight.data=Binarize(self.weight_org) - - - out = conv2d(input, self.weight, None, self.stride, - self.padding, self.dilation, self.groups) - - if not self.bias is None: - self.bias.org=self.bias.data.clone() - out += self.bias.view(1, -1, 1, 1).expand_as(out) - - return out - +import torch +from torch.nn import Module, Conv2d, Linear +from torch.nn.functional import linear, conv2d + + +__all__ = ['BNNLinear', 'BNNConv2d'] + + + + +def Binarize(tensor,quant_mode='det'): + if quant_mode=='det': + return tensor.sign() + if quant_mode=='bin': + return (tensor>=0).type(type(tensor))*2-1 + else: + return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) + + +class BNNLinear(Linear): + + def __init__(self, *kargs, **kwargs): + super(BNNLinear, self).__init__(*kargs, **kwargs) + self.register_buffer('weight_org', self.weight.data.clone()) + + def forward(self, input): + + if (input.size(1) != 784) and (input.size(1) != 3072): + input.data=Binarize(input.data) + + self.weight.data=Binarize(self.weight_org) + out = linear(input, self.weight) + + if not self.bias is None: + self.bias.org=self.bias.data.clone() + out += self.bias.view(1, -1).expand_as(out) + + return out + + +class BNNConv2d(Conv2d): + + def __init__(self, *kargs, **kwargs): + super(BNNConv2d, self).__init__(*kargs, **kwargs) + self.register_buffer('weight_org', self.weight.data.clone()) + + def forward(self, input): + if input.size(1) != 3: + input.data = Binarize(input.data) + + self.weight.data=Binarize(self.weight_org) + + + out = conv2d(input, self.weight, None, self.stride, + self.padding, self.dilation, self.groups) + + if not self.bias is None: + self.bias.org=self.bias.data.clone() + out += self.bias.view(1, -1, 1, 1).expand_as(out) + + return out + diff --git a/src/models/NN/Type_1/BinaryNet/models/dorefa_layers.py b/src/models/NN/Type_1/BinaryNet/models/dorefa_layers.py index b1dad33..9388b5b 100755 --- a/src/models/NN/Type_1/BinaryNet/models/dorefa_layers.py +++ b/src/models/NN/Type_1/BinaryNet/models/dorefa_layers.py @@ -1,110 +1,110 @@ -import torch -import numpy as np -from torch.autograd import Function -from torch.nn import Conv2d, Linear -from torch.nn.functional import linear, conv2d - -__all__ = ['DOREFAConv2d','DOREFALinear'] - - -class ScaleSigner(Function): - """take a real value x, output sign(x)*E(|x|)""" - @staticmethod - def forward(ctx, input): - return torch.sign(input) * torch.mean(torch.abs(input)) - - @staticmethod - def backward(ctx, grad_output): - return grad_output - - -def scale_sign(input): - return ScaleSigner.apply(input) - - -class Quantizer(Function): - @staticmethod - def forward(ctx, input, nbit): - scale = 2 ** nbit - 1 - return torch.round(input * scale) / scale - - @staticmethod - def backward(ctx, grad_output): - return grad_output, None - - -def quantize(input, nbit): - return Quantizer.apply(input, nbit) - - -def dorefa_w(w, nbit_w): - if nbit_w == 1: - w = scale_sign(w) - else: - w = torch.tanh(w) - w = w / (2 * torch.max(torch.abs(w))) + 0.5 - w = 2 * quantize(w, nbit_w) - 1 - - return w - - -def dorefa_a(input, nbit_a): - return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a) - - -class DOREFAConv2d(Conv2d): - """docstring for QuanConv""" - def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, - nbit_a=1, stride=1, - padding=0, dilation=1, groups=1, - bias=True): - super(DOREFAConv2d, self).__init__( - in_channels, out_channels, kernel_size, stride, padding, dilation, - groups, bias) - self.nbit_w = nbit_w - self.nbit_a = nbit_a - name_w_dict = {'dorefa': dorefa_w} - name_a_dict = {'dorefa': dorefa_a} - self.quan_w = name_w_dict[quan_name_w] - self.quan_a = name_a_dict[quan_name_a] - - def forward(self, input): - if self.nbit_w < 32: - w = self.quan_w(self.weight, self.nbit_w) - else: - w = self.weight - - if self.nbit_a < 32: - x = self.quan_a(input, self.nbit_a) - else: - x = input - - output = conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) - - return output - -class DOREFALinear(Linear): - def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, nbit_a=1): - super(DOREFALinear, self).__init__(in_features, out_features, bias) - self.nbit_w = nbit_w - self.nbit_a = nbit_a - name_w_dict = {'dorefa': dorefa_w} - name_a_dict = {'dorefa': dorefa_a} - self.quan_w = name_w_dict[quan_name_w] - self.quan_a = name_a_dict[quan_name_a] - - def forward(self, input): - if self.nbit_w < 32: - w = self.quan_w(self.weight, self.nbit_w) - else: - w = self.weight - - if self.nbit_a < 32: - x = self.quan_a(input, self.nbit_a) - else: - x = input - - - output = linear(x, w, self.bias) - - return output +import torch +import numpy as np +from torch.autograd import Function +from torch.nn import Conv2d, Linear +from torch.nn.functional import linear, conv2d + +__all__ = ['DOREFAConv2d','DOREFALinear'] + + +class ScaleSigner(Function): + """take a real value x, output sign(x)*E(|x|)""" + @staticmethod + def forward(ctx, input): + return torch.sign(input) * torch.mean(torch.abs(input)) + + @staticmethod + def backward(ctx, grad_output): + return grad_output + + +def scale_sign(input): + return ScaleSigner.apply(input) + + +class Quantizer(Function): + @staticmethod + def forward(ctx, input, nbit): + scale = 2 ** nbit - 1 + return torch.round(input * scale) / scale + + @staticmethod + def backward(ctx, grad_output): + return grad_output, None + + +def quantize(input, nbit): + return Quantizer.apply(input, nbit) + + +def dorefa_w(w, nbit_w): + if nbit_w == 1: + w = scale_sign(w) + else: + w = torch.tanh(w) + w = w / (2 * torch.max(torch.abs(w))) + 0.5 + w = 2 * quantize(w, nbit_w) - 1 + + return w + + +def dorefa_a(input, nbit_a): + return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a) + + +class DOREFAConv2d(Conv2d): + """docstring for QuanConv""" + def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, + nbit_a=1, stride=1, + padding=0, dilation=1, groups=1, + bias=True): + super(DOREFAConv2d, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias) + self.nbit_w = nbit_w + self.nbit_a = nbit_a + name_w_dict = {'dorefa': dorefa_w} + name_a_dict = {'dorefa': dorefa_a} + self.quan_w = name_w_dict[quan_name_w] + self.quan_a = name_a_dict[quan_name_a] + + def forward(self, input): + if self.nbit_w < 32: + w = self.quan_w(self.weight, self.nbit_w) + else: + w = self.weight + + if self.nbit_a < 32: + x = self.quan_a(input, self.nbit_a) + else: + x = input + + output = conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) + + return output + +class DOREFALinear(Linear): + def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, nbit_a=1): + super(DOREFALinear, self).__init__(in_features, out_features, bias) + self.nbit_w = nbit_w + self.nbit_a = nbit_a + name_w_dict = {'dorefa': dorefa_w} + name_a_dict = {'dorefa': dorefa_a} + self.quan_w = name_w_dict[quan_name_w] + self.quan_a = name_a_dict[quan_name_a] + + def forward(self, input): + if self.nbit_w < 32: + w = self.quan_w(self.weight, self.nbit_w) + else: + w = self.weight + + if self.nbit_a < 32: + x = self.quan_a(input, self.nbit_a) + else: + x = input + + + output = linear(x, w, self.bias) + + return output diff --git a/src/models/NN/Type_1/BinaryNet/models/dorefa_resnet.py b/src/models/NN/Type_1/BinaryNet/models/dorefa_resnet.py index 92ebbb9..3f426d6 100755 --- a/src/models/NN/Type_1/BinaryNet/models/dorefa_resnet.py +++ b/src/models/NN/Type_1/BinaryNet/models/dorefa_resnet.py @@ -1,154 +1,154 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .dorefa_layers import DOREFAConv2d as Conv -from .dorefa_layers import DOREFALinear as Linear - -__all__ = ['dorefa_resnet18'] - - -def conv3x3(in_planes, out_planes, wbit, abit, stride=1): - """3x3 convolution with padding""" - return Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, nbit_w=wbit, nbit_a=abit) - - -def conv1x1(in_planes, out_planes, wbit, abit, stride=1): - """1x1 convolution""" - return Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, nbit_w=wbit, nbit_a=abit) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, in_planes, planes, wbit, abit, sparsity_list, stride=1): - super(BasicBlock, self).__init__() - - self.bb = nn.Sequential( - conv3x3(in_planes, planes, wbit=wbit, abit=abit, stride=stride), - nn.BatchNorm2d(planes), - nn.ReLU(inplace=True), - conv3x3(planes, planes, wbit=wbit, abit=abit, stride=1), - nn.BatchNorm2d(planes), - ) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - conv1x1(in_planes, self.expansion*planes, wbit=wbit, abit=abit, stride=stride), - nn.BatchNorm2d(self.expansion*planes,sparsity_list) - ) - - def forward(self, x): - out = self.bb(x) - out += self.shortcut(x) - out = F.relu(out) - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, in_planes, planes, wbit, abit, stride=1): - super(Bottleneck, self).__init__() - self.conv1 = conv1x1(in_planes, planes, wbit=wbit, abit=abit, stride=1) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = conv3x3(planes, planes, wbit=wbit, abit=abit, stride=stride) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = conv1x1(planes, self.expansion*planes,wbit=wbit, abit=abit, stride=1) - self.bn3 = nn.BatchNorm2d(self.expansion*planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - conv1x1(in_planes, self.expansion*planes,wbit=wbit,abit=abit,stride=stride), - nn.BatchNorm2d(self.expansion*planes) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - - - - - -class ResNet(nn.Module): - def __init__(self, block, num_blocks, wbit=1, abit=1, num_classes=10): - super(ResNet, self).__init__() - self.in_planes = 64 - - self.head = nn.Sequential( - nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), - nn.BatchNorm2d(64), - nn.ReLU(inplace=True), - ) - - self.layer1 = self._make_layer(block, 64, num_blocks[0], wbit=wbit, abit=abit, stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], wbit=wbit, abit=abit, stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], wbit=wbit, abit=abit, stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], wbit=wbit, abit=abit, stride=2) - - self.tail = nn.Sequential( - nn.AdaptiveAvgPool2d(1), - nn.Flatten(), - nn.Linear(512*block.expansion, num_classes), - ) - - def init_w(self): - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - return - - def _make_layer(self, block, planes, num_blocks, wbit, abit, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, wbit, abit, stride)) - self.in_planes = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - - out = self.head(x) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = self.tail(out) - return out - - - - -def dorefa_resnet18(wbit=1, abit=1): - return ResNet(BasicBlock, [2,2,2,2], wbit=wbit, abit=abit) - -def ResNet34(wbit, abit): - return ResNet(BasicBlock, [3,4,6,3], wbit=wbit, abit=abit) - -def ResNet50(wbit, abit): - return ResNet(Bottleneck, [3,4,6,3], wbit=wbit, abit=abit) - -def ResNet101(wbit, abit): - return ResNet(Bottleneck, [3,4,23,3], wbit=wbit, abit=abit) - -def ResNet152(wbit, abit): - return ResNet(Bottleneck, [3,8,36,3], wbit=wbit, abit=abit) - - +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .dorefa_layers import DOREFAConv2d as Conv +from .dorefa_layers import DOREFALinear as Linear + +__all__ = ['dorefa_resnet18'] + + +def conv3x3(in_planes, out_planes, wbit, abit, stride=1): + """3x3 convolution with padding""" + return Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, nbit_w=wbit, nbit_a=abit) + + +def conv1x1(in_planes, out_planes, wbit, abit, stride=1): + """1x1 convolution""" + return Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, nbit_w=wbit, nbit_a=abit) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, wbit, abit, sparsity_list, stride=1): + super(BasicBlock, self).__init__() + + self.bb = nn.Sequential( + conv3x3(in_planes, planes, wbit=wbit, abit=abit, stride=stride), + nn.BatchNorm2d(planes), + nn.ReLU(inplace=True), + conv3x3(planes, planes, wbit=wbit, abit=abit, stride=1), + nn.BatchNorm2d(planes), + ) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + conv1x1(in_planes, self.expansion*planes, wbit=wbit, abit=abit, stride=stride), + nn.BatchNorm2d(self.expansion*planes,sparsity_list) + ) + + def forward(self, x): + out = self.bb(x) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, wbit, abit, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(in_planes, planes, wbit=wbit, abit=abit, stride=1) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, wbit=wbit, abit=abit, stride=stride) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, self.expansion*planes,wbit=wbit, abit=abit, stride=1) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + conv1x1(in_planes, self.expansion*planes,wbit=wbit,abit=abit,stride=stride), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + + + + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, wbit=1, abit=1, num_classes=10): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.head = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + ) + + self.layer1 = self._make_layer(block, 64, num_blocks[0], wbit=wbit, abit=abit, stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], wbit=wbit, abit=abit, stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], wbit=wbit, abit=abit, stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], wbit=wbit, abit=abit, stride=2) + + self.tail = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Flatten(), + nn.Linear(512*block.expansion, num_classes), + ) + + def init_w(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + return + + def _make_layer(self, block, planes, num_blocks, wbit, abit, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, wbit, abit, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + + out = self.head(x) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = self.tail(out) + return out + + + + +def dorefa_resnet18(wbit=1, abit=1): + return ResNet(BasicBlock, [2,2,2,2], wbit=wbit, abit=abit) + +def ResNet34(wbit, abit): + return ResNet(BasicBlock, [3,4,6,3], wbit=wbit, abit=abit) + +def ResNet50(wbit, abit): + return ResNet(Bottleneck, [3,4,6,3], wbit=wbit, abit=abit) + +def ResNet101(wbit, abit): + return ResNet(Bottleneck, [3,4,23,3], wbit=wbit, abit=abit) + +def ResNet152(wbit, abit): + return ResNet(Bottleneck, [3,8,36,3], wbit=wbit, abit=abit) + + diff --git a/src/models/NN/Type_1/BinaryNet/models/xnor_layers.py b/src/models/NN/Type_1/BinaryNet/models/xnor_layers.py index 203f100..1318c2b 100755 --- a/src/models/NN/Type_1/BinaryNet/models/xnor_layers.py +++ b/src/models/NN/Type_1/BinaryNet/models/xnor_layers.py @@ -1,145 +1,145 @@ -from torch import zeros -from torch.autograd import Function -from torch.nn import Parameter, Module, Conv2d, Linear, BatchNorm1d, BatchNorm2d, Dropout, ReLU - - -__all__ = ['XNORConv2d', 'XNORLinear', 'BNConvReLU','BNLinearReLU'] - - -class BinActive(Function): - @staticmethod - def forward(ctx, input): - ctx.save_for_backward(input) - input = input.sign() - return input - - @staticmethod - def backward(ctx, grad_output): - input, = ctx.saved_tensors - grad_input = grad_output.clone() - grad_input[input.ge(1)] = 0 - grad_input[input.le(-1)] = 0 - return grad_input - - -class XNORConv2d(Module): - def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): - super(XNORConv2d, self).__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.padding = padding - self.groups = groups - - self.conv = Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = kernel_size, stride = stride, padding = padding, groups = groups) - self.conv.weight.data.normal_(0, 0.05) - self.conv.bias.data.zero_() - - self.fp_weights = Parameter(zeros(self.conv.weight.size())) - self.fp_weights.data.copy_(self.conv.weight.data) - - def forward(self, x): - - self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) - self.fp_weights.data.clamp_(-1, 1) - self.mean_val = self.fp_weights.abs().view(self.out_channels, -1).mean(1, keepdim=True) - - self.conv.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1, 1, 1)) - x = self.conv(x) - - return x - - def update_gradient(self): - proxy = self.fp_weights.abs().sign() - proxy[self.fp_weights.data.abs()>1] = 0 - binary_grad = self.conv.weight.grad * self.mean_val.view(-1, 1, 1, 1) * proxy - - mean_grad = self.conv.weight.data.sign() * self.conv.weight.grad - mean_grad = mean_grad.view(self.out_channels, -1).mean(1).view(-1, 1, 1, 1) - mean_grad = mean_grad * self.conv.weight.data.sign() - - self.fp_weights.grad = binary_grad + mean_grad - self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) - -class BNConvReLU(Module): - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): - super(BNConvReLU, self).__init__() - self.dropout = dropout_ratio - self.a_active = BinActive.apply - - self.bn = BatchNorm2d(in_channels, eps=1e-4, momentum=0.1, affine=True) - if self.dropout !=0: - self.drop = Dropout(self.dropout, inplace=True) - self.econv = XNORConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias) - self.relu = ReLU(inplace=True) - - def forward(self, x): - - x = self.bn(x) - x = self.a_active(x) - if self.dropout !=0: - x = self.drop(x) - - x = self.econv(x) - x = self.relu(x) - return x - - -class XNORLinear(Module): - def __init__(self, in_features, out_features, bias=True): - super(XNORLinear, self).__init__() - self.in_features = in_features - self.out_features = out_features - self.bias = bias - - self.linear = Linear(in_features = in_features, out_features = out_features, bias = bias) - self.fp_weights = Parameter(zeros(self.linear.weight.size())) - self.fp_weights.data.copy_(self.linear.weight.data) - - def forward(self, x): - self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) - self.fp_weights.data.clamp_(-1, 1) - - self.mean_val = self.fp_weights.abs().view(self.out_features, -1).mean(1, keepdim=True) - - self.linear.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1)) - x = self.linear(x) - return x - - def update_gradient(self): - proxy = self.fp_weights.abs().sign() - proxy[self.fp_weights.data.abs()>1] = 0 - binary_grad = self.linear.weight.grad * self.mean_val.view(-1, 1) * proxy - - mean_grad = self.linear.weight.data.sign() * self.linear.weight.grad - mean_grad = mean_grad.view(self.out_features, -1).mean(1).view(-1, 1) - mean_grad = mean_grad * self.linear.weight.data.sign() - - self.fp_weights.grad = binary_grad + mean_grad - self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) - return - -class BNLinearReLU(Module): - def __init__(self, in_channels, out_channels, bias=True, dropout_ratio=0): - super(BNLinearReLU, self).__init__() - self.dropout = dropout_ratio - self.a_active = BinActive.apply - - self.bn = BatchNorm1d(in_channels, eps=1e-4, momentum=0.1, affine=True) - if self.dropout !=0: - self.drop = Dropout(self.dropout, inplace=True) - self.fc = XNORLinear(in_channels, out_channels, bias=bias) - self.relu = ReLU(inplace=True) - - def forward(self, x): - - x = self.bn(x) - x = self.a_active(x) - if self.dropout !=0: - x = self.drop(x) - - x = self.fc(x) - x = self.relu(x) - return x +from torch import zeros +from torch.autograd import Function +from torch.nn import Parameter, Module, Conv2d, Linear, BatchNorm1d, BatchNorm2d, Dropout, ReLU + + +__all__ = ['XNORConv2d', 'XNORLinear', 'BNConvReLU','BNLinearReLU'] + + +class BinActive(Function): + @staticmethod + def forward(ctx, input): + ctx.save_for_backward(input) + input = input.sign() + return input + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + grad_input = grad_output.clone() + grad_input[input.ge(1)] = 0 + grad_input[input.le(-1)] = 0 + return grad_input + + +class XNORConv2d(Module): + def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): + super(XNORConv2d, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.groups = groups + + self.conv = Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = kernel_size, stride = stride, padding = padding, groups = groups) + self.conv.weight.data.normal_(0, 0.05) + self.conv.bias.data.zero_() + + self.fp_weights = Parameter(zeros(self.conv.weight.size())) + self.fp_weights.data.copy_(self.conv.weight.data) + + def forward(self, x): + + self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) + self.fp_weights.data.clamp_(-1, 1) + self.mean_val = self.fp_weights.abs().view(self.out_channels, -1).mean(1, keepdim=True) + + self.conv.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1, 1, 1)) + x = self.conv(x) + + return x + + def update_gradient(self): + proxy = self.fp_weights.abs().sign() + proxy[self.fp_weights.data.abs()>1] = 0 + binary_grad = self.conv.weight.grad * self.mean_val.view(-1, 1, 1, 1) * proxy + + mean_grad = self.conv.weight.data.sign() * self.conv.weight.grad + mean_grad = mean_grad.view(self.out_channels, -1).mean(1).view(-1, 1, 1, 1) + mean_grad = mean_grad * self.conv.weight.data.sign() + + self.fp_weights.grad = binary_grad + mean_grad + self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) + +class BNConvReLU(Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): + super(BNConvReLU, self).__init__() + self.dropout = dropout_ratio + self.a_active = BinActive.apply + + self.bn = BatchNorm2d(in_channels, eps=1e-4, momentum=0.1, affine=True) + if self.dropout !=0: + self.drop = Dropout(self.dropout, inplace=True) + self.econv = XNORConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias) + self.relu = ReLU(inplace=True) + + def forward(self, x): + + x = self.bn(x) + x = self.a_active(x) + if self.dropout !=0: + x = self.drop(x) + + x = self.econv(x) + x = self.relu(x) + return x + + +class XNORLinear(Module): + def __init__(self, in_features, out_features, bias=True): + super(XNORLinear, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.bias = bias + + self.linear = Linear(in_features = in_features, out_features = out_features, bias = bias) + self.fp_weights = Parameter(zeros(self.linear.weight.size())) + self.fp_weights.data.copy_(self.linear.weight.data) + + def forward(self, x): + self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) + self.fp_weights.data.clamp_(-1, 1) + + self.mean_val = self.fp_weights.abs().view(self.out_features, -1).mean(1, keepdim=True) + + self.linear.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1)) + x = self.linear(x) + return x + + def update_gradient(self): + proxy = self.fp_weights.abs().sign() + proxy[self.fp_weights.data.abs()>1] = 0 + binary_grad = self.linear.weight.grad * self.mean_val.view(-1, 1) * proxy + + mean_grad = self.linear.weight.data.sign() * self.linear.weight.grad + mean_grad = mean_grad.view(self.out_features, -1).mean(1).view(-1, 1) + mean_grad = mean_grad * self.linear.weight.data.sign() + + self.fp_weights.grad = binary_grad + mean_grad + self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) + return + +class BNLinearReLU(Module): + def __init__(self, in_channels, out_channels, bias=True, dropout_ratio=0): + super(BNLinearReLU, self).__init__() + self.dropout = dropout_ratio + self.a_active = BinActive.apply + + self.bn = BatchNorm1d(in_channels, eps=1e-4, momentum=0.1, affine=True) + if self.dropout !=0: + self.drop = Dropout(self.dropout, inplace=True) + self.fc = XNORLinear(in_channels, out_channels, bias=bias) + self.relu = ReLU(inplace=True) + + def forward(self, x): + + x = self.bn(x) + x = self.a_active(x) + if self.dropout !=0: + x = self.drop(x) + + x = self.fc(x) + x = self.relu(x) + return x diff --git a/src/models/NN/Type_1/BinaryNet/models/xnor_lenet.py b/src/models/NN/Type_1/BinaryNet/models/xnor_lenet.py index 444c1f9..ba2031b 100755 --- a/src/models/NN/Type_1/BinaryNet/models/xnor_lenet.py +++ b/src/models/NN/Type_1/BinaryNet/models/xnor_lenet.py @@ -1,45 +1,45 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['lenet5'] - -class LeNet5(nn.Module): - def __init__(self, out_classes = 10): - super(LeNet5, self).__init__() - self.features = nn.Sequential( - nn.Conv2d(1, 20, kernel_size=5, stride=1), - nn.BatchNorm2d(20, eps=1e-4, momentum=0.1, affine=False), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=2, stride=2), - XNORConv2d(20, 50, kernel_size=5, stride=1, padding=0), - nn.MaxPool2d(kernel_size=2, stride=2), - nn.Flatten(), - ) - self.classifier = nn.Sequential( - BNLinearReLU(800, 500), - nn.BatchNorm1d(500, eps=1e-4, momentum=0.1, affine=False), - nn.Linear(500, out_classes), - ) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.features(x) - x = self.classifier(x) - return x - -def lenet5(out_classes=10): - return LeNet5(out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['lenet5'] + +class LeNet5(nn.Module): + def __init__(self, out_classes = 10): + super(LeNet5, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 20, kernel_size=5, stride=1), + nn.BatchNorm2d(20, eps=1e-4, momentum=0.1, affine=False), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + XNORConv2d(20, 50, kernel_size=5, stride=1, padding=0), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Flatten(), + ) + self.classifier = nn.Sequential( + BNLinearReLU(800, 500), + nn.BatchNorm1d(500, eps=1e-4, momentum=0.1, affine=False), + nn.Linear(500, out_classes), + ) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.features(x) + x = self.classifier(x) + return x + +def lenet5(out_classes=10): + return LeNet5(out_classes) diff --git a/src/models/NN/Type_1/BinaryNet/models/xnor_mlp.py b/src/models/NN/Type_1/BinaryNet/models/xnor_mlp.py index b8768e6..b4dc26a 100755 --- a/src/models/NN/Type_1/BinaryNet/models/xnor_mlp.py +++ b/src/models/NN/Type_1/BinaryNet/models/xnor_mlp.py @@ -1,44 +1,44 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['mlp'] - -class MLP(nn.Module): - def __init__(self, input_size, num_hidden_nodes, num_layers, out_classes ): - super(MLP, self).__init__() - self.num_layers = num_layers - self.classifier = nn.Sequential() - for l in range(num_layers): - if l==0: - self.classifier.add_module('layer'+str(l)+'_flatten', nn.Flatten()) - self.classifier.add_module('layer'+str(l), nn.Linear(input_size, num_hidden_nodes[l])) - self.classifier.add_module('layer'+str(l)+'_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) - self.classifier.add_module('layer'+str(l)+'_activate', nn.ReLU(inplace=True)) - elif l+1 == num_layers: - self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], out_classes)) - else: - self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], num_hidden_nodes[l])) - self.classifier.add_module('layer' + str(l) + '_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) - self.classifier.add_module('layer' + str(l) + '_activate', nn.ReLU(inplace=True)) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.classifier(x) - return x - -def mlp(input_size, num_hidden_nodes, num_layers, out_classes): - return MLP(input_size, num_hidden_nodes, num_layers, out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['mlp'] + +class MLP(nn.Module): + def __init__(self, input_size, num_hidden_nodes, num_layers, out_classes ): + super(MLP, self).__init__() + self.num_layers = num_layers + self.classifier = nn.Sequential() + for l in range(num_layers): + if l==0: + self.classifier.add_module('layer'+str(l)+'_flatten', nn.Flatten()) + self.classifier.add_module('layer'+str(l), nn.Linear(input_size, num_hidden_nodes[l])) + self.classifier.add_module('layer'+str(l)+'_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) + self.classifier.add_module('layer'+str(l)+'_activate', nn.ReLU(inplace=True)) + elif l+1 == num_layers: + self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], out_classes)) + else: + self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], num_hidden_nodes[l])) + self.classifier.add_module('layer' + str(l) + '_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) + self.classifier.add_module('layer' + str(l) + '_activate', nn.ReLU(inplace=True)) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.classifier(x) + return x + +def mlp(input_size, num_hidden_nodes, num_layers, out_classes): + return MLP(input_size, num_hidden_nodes, num_layers, out_classes) diff --git a/src/models/NN/Type_1/BinaryNet/models/xnor_nin.py b/src/models/NN/Type_1/BinaryNet/models/xnor_nin.py index 146cab5..952ca07 100755 --- a/src/models/NN/Type_1/BinaryNet/models/xnor_nin.py +++ b/src/models/NN/Type_1/BinaryNet/models/xnor_nin.py @@ -1,55 +1,55 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['nin'] - -class NIN(nn.Module): - def __init__(self, out_class=10): - super(NIN, self).__init__() - - - self.features = nn.Sequential( - nn.Conv2d(3, 192, kernel_size = 5, stride = 1, padding = 2), - nn.BatchNorm2d(192, eps=1e-4, momentum = 0.1, affine = False), - nn.ReLU(inplace=True), - - BNConvReLU(192, 160, kernel_size=1, stride=1, padding=0), - BNConvReLU(160, 96, kernel_size=1, stride=1, padding=0), - nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1), - - BNConvReLU(96, 192, kernel_size=5, stride=1, padding=2, dropout_ratio=0.5), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - nn.AvgPool2d(kernel_size = 3, stride = 2, padding = 1), - - BNConvReLU(192, 192, kernel_size=3, stride=1, padding=1, dropout_ratio=0.5), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - - nn.BatchNorm2d(192, eps = 1e-4, momentum = 0.1, affine = False), - nn.Conv2d(192, out_class, kernel_size = 1, stride = 1, padding = 0), - nn.ReLU(inplace=True), - nn.AdaptiveAvgPool2d(1), - nn.Flatten() - ) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.features(x) - return x - -def nin(out_classes=10): - return NIN(out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['nin'] + +class NIN(nn.Module): + def __init__(self, out_class=10): + super(NIN, self).__init__() + + + self.features = nn.Sequential( + nn.Conv2d(3, 192, kernel_size = 5, stride = 1, padding = 2), + nn.BatchNorm2d(192, eps=1e-4, momentum = 0.1, affine = False), + nn.ReLU(inplace=True), + + BNConvReLU(192, 160, kernel_size=1, stride=1, padding=0), + BNConvReLU(160, 96, kernel_size=1, stride=1, padding=0), + nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1), + + BNConvReLU(96, 192, kernel_size=5, stride=1, padding=2, dropout_ratio=0.5), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + nn.AvgPool2d(kernel_size = 3, stride = 2, padding = 1), + + BNConvReLU(192, 192, kernel_size=3, stride=1, padding=1, dropout_ratio=0.5), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + + nn.BatchNorm2d(192, eps = 1e-4, momentum = 0.1, affine = False), + nn.Conv2d(192, out_class, kernel_size = 1, stride = 1, padding = 0), + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d(1), + nn.Flatten() + ) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.features(x) + return x + +def nin(out_classes=10): + return NIN(out_classes) diff --git a/src/models/NN/Type_1/BinaryNet/requirements.txt b/src/models/NN/Type_1/BinaryNet/requirements.txt index dc3af0c..40ebca7 100755 --- a/src/models/NN/Type_1/BinaryNet/requirements.txt +++ b/src/models/NN/Type_1/BinaryNet/requirements.txt @@ -1,5 +1,5 @@ -torch -torchvision -tqdm -pyyaml - +torch +torchvision +tqdm +pyyaml + diff --git a/src/models/NN/Type_1/BinaryNet/yml/bnn_caffenet_cifar10.yml b/src/models/NN/Type_1/BinaryNet/yml/bnn_caffenet_cifar10.yml index 99fab9d..2c352c2 100755 --- a/src/models/NN/Type_1/BinaryNet/yml/bnn_caffenet_cifar10.yml +++ b/src/models/NN/Type_1/BinaryNet/yml/bnn_caffenet_cifar10.yml @@ -1,17 +1,17 @@ -no_cuda: False -checkpoint: "results/bnn_caffenet_cifar10" -filename: null -pretrained: null -bin_type: 'bnn' - -model : "bnn_caffenet" -save_path: "results/bnn_caffenet_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'sgd' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/bnn_caffenet_cifar10" +filename: null +pretrained: null +bin_type: 'bnn' + +model : "bnn_caffenet" +save_path: "results/bnn_caffenet_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'sgd' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_1/BinaryNet/yml/dorefa_resnet_cifar10.yml b/src/models/NN/Type_1/BinaryNet/yml/dorefa_resnet_cifar10.yml index dffecfb..29f5c73 100755 --- a/src/models/NN/Type_1/BinaryNet/yml/dorefa_resnet_cifar10.yml +++ b/src/models/NN/Type_1/BinaryNet/yml/dorefa_resnet_cifar10.yml @@ -1,18 +1,18 @@ -no_cuda: False -checkpoint: "results/dorefa_resnet_cifar10" -filename: null -pretrained: null - -bin_type: 'dorefa' - -model : "dorefa_resnet18" -save_path: "results/dorefa_resnet_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'sgd' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/dorefa_resnet_cifar10" +filename: null +pretrained: null + +bin_type: 'dorefa' + +model : "dorefa_resnet18" +save_path: "results/dorefa_resnet_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'sgd' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_1/BinaryNet/yml/lenet_mnist.yml b/src/models/NN/Type_1/BinaryNet/yml/lenet_mnist.yml index 9226466..5eb5c0b 100755 --- a/src/models/NN/Type_1/BinaryNet/yml/lenet_mnist.yml +++ b/src/models/NN/Type_1/BinaryNet/yml/lenet_mnist.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/lenet_mnist" -filename: null -pretrained: null -bin_type: "xnor" -model : "lenet5" -save_path: "results/lenet_mnist" -dataset : "mnist" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [100, 200] -epochs: 300 - +no_cuda: False +checkpoint: "results/lenet_mnist" +filename: null +pretrained: null +bin_type: "xnor" +model : "lenet5" +save_path: "results/lenet_mnist" +dataset : "mnist" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [100, 200] +epochs: 300 + diff --git a/src/models/NN/Type_1/BinaryNet/yml/mlp_mnist.yml b/src/models/NN/Type_1/BinaryNet/yml/mlp_mnist.yml index 98511c9..94afe32 100755 --- a/src/models/NN/Type_1/BinaryNet/yml/mlp_mnist.yml +++ b/src/models/NN/Type_1/BinaryNet/yml/mlp_mnist.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/mlp_mnist" -filename: null -pretrained: null -bin_type: "xnor" -model : "mlp" -save_path: "results/mlp_mnist" -dataset : "mnist" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [100, 200] -epochs: 300 - +no_cuda: False +checkpoint: "results/mlp_mnist" +filename: null +pretrained: null +bin_type: "xnor" +model : "mlp" +save_path: "results/mlp_mnist" +dataset : "mnist" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [100, 200] +epochs: 300 + diff --git a/src/models/NN/Type_1/BinaryNet/yml/nin_cifar10.yml b/src/models/NN/Type_1/BinaryNet/yml/nin_cifar10.yml index dae28c6..c45127c 100755 --- a/src/models/NN/Type_1/BinaryNet/yml/nin_cifar10.yml +++ b/src/models/NN/Type_1/BinaryNet/yml/nin_cifar10.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/nin_cifar10" -filename: null -pretrained: null -bin_type: 'xnor' -model : "nin" -save_path: "results/nin_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/nin_cifar10" +filename: null +pretrained: null +bin_type: 'xnor' +model : "nin" +save_path: "results/nin_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_1/dedicated_p4.py b/src/models/NN/Type_1/dedicated_p4.py index 5a7676d..39eb6d7 100755 --- a/src/models/NN/Type_1/dedicated_p4.py +++ b/src/models/NN/Type_1/dedicated_p4.py @@ -1,305 +1,305 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) - +" = meta.middle_c" +str(int(compare[0])) +" - meta.middle_c"+str(int(compare[1]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"meta.result = "+str(int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - # print(con_list, c_n) - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" - +(10-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_hidden_nodes'] = config_file['num hidden nodes'] - config['num_layers'] = config_file["number of layers"] - config['num_classes'] = config_file["number of classes"] - config['model'] = config_file['model'] - config['width'] = config_file["width of inputs"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(10) + "> middle_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c + 1, config['num_classes']): - headers.write(" bit<" + str(10) + "> compare" + str(c) + "_" + str(c1) + ";\n") - - headers.write(" bit<64> bnnInput;\n" - " bit<64> XNOROutput;\n" - " bit<64> NextLayerInput;\n" - " bit<1> activated;\n" - " bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - ingress.write(" meta.bnnInput = 0;\n" - " meta.XNOROutput = 0;\n" - " meta.NextLayerInput = 0;\n" - " BuildInput();\n\n") - # " hdr.Planter.result = (bit<32>)meta.bnnInput;\n") - # " bit<32> debug = (bit<32>)meta.bnnInput;\n") - - count = 0 - for l in range(config['num_layers']): - ingress.write(" Layer"+str(l)+"_Process("+str(int(count))+");\n") - if l + 1 != config['num_layers']: - ingress.write(" meta.bnnInput = meta.NextLayerInput;\n" - " meta.NextLayerInput = 0;\n") - ingress.write("\n") - if l+1== config['num_layers']: - break - count+=config['num_hidden_nodes'][l] - - ingress.write(" compare();\n") - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - - - ingress.write(" register>(1024) weights;\n" - # " //bit<8> count = 0;\n" - # " bit<1> activated = 0;\n" - " bit<128> m1 = 0x55555555555555555555555555555555;\n" - " bit<128> m2 = 0x33333333333333333333333333333333;\n" - " bit<128> m4 = 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f;\n" - " bit<128> m8 = 0x00ff00ff00ff00ff00ff00ff00ff00ff;\n" - " bit<128> m16= 0x0000ffff0000ffff0000ffff0000ffff;\n" - " bit<128> m32= 0x00000000ffffffff00000000ffffffff;\n" - " bit<128> m64= 0x0000000000000000ffffffffffffffff;\n\n") - - - - ingress.write(" action XNOR(bit<64> weight){\n" - " meta.XNOROutput = weight^meta.bnnInput;\n" - " meta.XNOROutput = ~meta.XNOROutput;\n" - " }\n\n") - - for l in range(config['num_layers']): - if l+1 == config['num_layers']: - break - if l==0: - threshold = np.sum(config['width']) / 2 - else: - threshold = config['num_hidden_nodes'][l-1] / 2 - ingress.write(" action BitCount_l"+str(l)+"(bit<64> bitInput){\n" - " bit<128> x= (bit<128>)bitInput;\n" - " x = (x & m1 ) + ((x >> 1) & m1 );\n" - " x = (x & m2 ) + ((x >> 2) & m2 );\n" - " x = (x & m4 ) + ((x >> 4) & m4 );\n" - " x = (x & m8 ) + ((x >> 8) & m8 );\n" - " x = (x & m16) + ((x >> 16) & m16);\n" - " x = (x & m32) + ((x >> 32) & m32);\n" - " x = (x & m64) + ((x >> 64) & m64);\n" - " meta.activated = (x>"+str(int(np.floor(threshold)))+") ? (bit<1>)1 : 0;\n" - " meta.NextLayerInput = meta.NextLayerInput<<1;\n" - " meta.NextLayerInput = meta.NextLayerInput + (bit<64>)meta.activated;\n" - " }\n\n") - - for c in range(config['num_classes']): - ingress.write(" action BitCount_c"+str(c)+"(bit<64> bitInput){\n" - " bit<128> x= (bit<128>)bitInput;\n" - " x = (x & m1 ) + ((x >> 1) & m1 );\n" - " x = (x & m2 ) + ((x >> 2) & m2 );\n" - " x = (x & m4 ) + ((x >> 4) & m4 );\n" - " x = (x & m8 ) + ((x >> 8) & m8 );\n" - " x = (x & m16) + ((x >> 16) & m16);\n" - " x = (x & m32) + ((x >> 32) & m32);\n" - " x = (x & m64) + ((x >> 64) & m64);\n" - " meta.middle_c"+str(c)+" = (bit<10>) x;\n" - " }\n\n") - - for l in range(config['num_layers']): - if l == 0: - num_zeros = int(128 - np.sum(config['width'])) - num_ones = int(np.sum(config['width'])) - bound = int("0b"+"0"*num_zeros+"1"*num_ones,2) - else: - num_zeros = int(128 - config['num_hidden_nodes'][l - 1]) - num_ones = int(config['num_hidden_nodes'][l - 1]) - bound = int("0b" + "0" * num_zeros + "1" * num_ones, 2) - if l==0: - ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" - # " bit < "+str(int(np.sum(config['width'])))+" > weight = 0;\n" - " bit <64> weight = 0;\n") - # " meta.NextLayerInput = 0;\n") - for h in range(config['num_hidden_nodes'][l]): - ingress.write(" weights.read( weight, (bit<32>)offset+"+str(h)+");\n" - " XNOR(weight);\n" - " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" - " BitCount_l"+str(l)+"(meta.XNOROutput);\n") - ingress.write(" }\n\n") - elif l+1==config['num_layers']: - ingress.write(" action Layer" + str(l) + "_Process(bit <10> offset){ \n" - " bit <64> weight = 0;\n") - # " meta.NextLayerInput = 0;\n") - for c in range(config['num_classes']): - ingress.write(" weights.read( weight, (bit<32>)offset+" + str(c) + ");\n" - " XNOR(weight);\n" - " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" - " BitCount_c"+str(c)+"(meta.XNOROutput);\n") - ingress.write(" }\n\n") - else: - ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" - " bit <64> weight = 0;\n" - " meta.NextLayerInput = 0;\n") - for h in range(config['num_hidden_nodes'][l]): - ingress.write(" weights.read(weight, (bit<32>)offset+"+str(h)+");\n" - " XNOR(weight);\n" - " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" - " BitCount_l"+str(l)+"(meta.XNOROutput);\n") - ingress.write(" }\n\n") - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - ingress.write(" action BuildInput(){\n") - for f in range(config['num_features']): - if f+1) meta.feature"+str(f)+") << "+str(int(config['width'][f+1]))+";\n") - else: - ingress.write(" meta.bnnInput = (meta.bnnInput + (bit <64>) meta.feature" + str(f) + ") ;\n") - - ingress.write(" }\n\n") - - ################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for idx in range(len(Exact_Table['weights'])): - file.write("register_write SwitchIngress.weights "+str(idx)+" "+str(Exact_Table['weights'][idx])+"\n") - - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) + +" = meta.middle_c" +str(int(compare[0])) +" - meta.middle_c"+str(int(compare[1]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"meta.result = "+str(int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + # print(con_list, c_n) + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" + +(10-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_hidden_nodes'] = config_file['num hidden nodes'] + config['num_layers'] = config_file["number of layers"] + config['num_classes'] = config_file["number of classes"] + config['model'] = config_file['model'] + config['width'] = config_file["width of inputs"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(10) + "> middle_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c + 1, config['num_classes']): + headers.write(" bit<" + str(10) + "> compare" + str(c) + "_" + str(c1) + ";\n") + + headers.write(" bit<64> bnnInput;\n" + " bit<64> XNOROutput;\n" + " bit<64> NextLayerInput;\n" + " bit<1> activated;\n" + " bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + ingress.write(" meta.bnnInput = 0;\n" + " meta.XNOROutput = 0;\n" + " meta.NextLayerInput = 0;\n" + " BuildInput();\n\n") + # " hdr.Planter.result = (bit<32>)meta.bnnInput;\n") + # " bit<32> debug = (bit<32>)meta.bnnInput;\n") + + count = 0 + for l in range(config['num_layers']): + ingress.write(" Layer"+str(l)+"_Process("+str(int(count))+");\n") + if l + 1 != config['num_layers']: + ingress.write(" meta.bnnInput = meta.NextLayerInput;\n" + " meta.NextLayerInput = 0;\n") + ingress.write("\n") + if l+1== config['num_layers']: + break + count+=config['num_hidden_nodes'][l] + + ingress.write(" compare();\n") + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + + + ingress.write(" register>(1024) weights;\n" + # " //bit<8> count = 0;\n" + # " bit<1> activated = 0;\n" + " bit<128> m1 = 0x55555555555555555555555555555555;\n" + " bit<128> m2 = 0x33333333333333333333333333333333;\n" + " bit<128> m4 = 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f;\n" + " bit<128> m8 = 0x00ff00ff00ff00ff00ff00ff00ff00ff;\n" + " bit<128> m16= 0x0000ffff0000ffff0000ffff0000ffff;\n" + " bit<128> m32= 0x00000000ffffffff00000000ffffffff;\n" + " bit<128> m64= 0x0000000000000000ffffffffffffffff;\n\n") + + + + ingress.write(" action XNOR(bit<64> weight){\n" + " meta.XNOROutput = weight^meta.bnnInput;\n" + " meta.XNOROutput = ~meta.XNOROutput;\n" + " }\n\n") + + for l in range(config['num_layers']): + if l+1 == config['num_layers']: + break + if l==0: + threshold = np.sum(config['width']) / 2 + else: + threshold = config['num_hidden_nodes'][l-1] / 2 + ingress.write(" action BitCount_l"+str(l)+"(bit<64> bitInput){\n" + " bit<128> x= (bit<128>)bitInput;\n" + " x = (x & m1 ) + ((x >> 1) & m1 );\n" + " x = (x & m2 ) + ((x >> 2) & m2 );\n" + " x = (x & m4 ) + ((x >> 4) & m4 );\n" + " x = (x & m8 ) + ((x >> 8) & m8 );\n" + " x = (x & m16) + ((x >> 16) & m16);\n" + " x = (x & m32) + ((x >> 32) & m32);\n" + " x = (x & m64) + ((x >> 64) & m64);\n" + " meta.activated = (x>"+str(int(np.floor(threshold)))+") ? (bit<1>)1 : 0;\n" + " meta.NextLayerInput = meta.NextLayerInput<<1;\n" + " meta.NextLayerInput = meta.NextLayerInput + (bit<64>)meta.activated;\n" + " }\n\n") + + for c in range(config['num_classes']): + ingress.write(" action BitCount_c"+str(c)+"(bit<64> bitInput){\n" + " bit<128> x= (bit<128>)bitInput;\n" + " x = (x & m1 ) + ((x >> 1) & m1 );\n" + " x = (x & m2 ) + ((x >> 2) & m2 );\n" + " x = (x & m4 ) + ((x >> 4) & m4 );\n" + " x = (x & m8 ) + ((x >> 8) & m8 );\n" + " x = (x & m16) + ((x >> 16) & m16);\n" + " x = (x & m32) + ((x >> 32) & m32);\n" + " x = (x & m64) + ((x >> 64) & m64);\n" + " meta.middle_c"+str(c)+" = (bit<10>) x;\n" + " }\n\n") + + for l in range(config['num_layers']): + if l == 0: + num_zeros = int(128 - np.sum(config['width'])) + num_ones = int(np.sum(config['width'])) + bound = int("0b"+"0"*num_zeros+"1"*num_ones,2) + else: + num_zeros = int(128 - config['num_hidden_nodes'][l - 1]) + num_ones = int(config['num_hidden_nodes'][l - 1]) + bound = int("0b" + "0" * num_zeros + "1" * num_ones, 2) + if l==0: + ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" + # " bit < "+str(int(np.sum(config['width'])))+" > weight = 0;\n" + " bit <64> weight = 0;\n") + # " meta.NextLayerInput = 0;\n") + for h in range(config['num_hidden_nodes'][l]): + ingress.write(" weights.read( weight, (bit<32>)offset+"+str(h)+");\n" + " XNOR(weight);\n" + " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" + " BitCount_l"+str(l)+"(meta.XNOROutput);\n") + ingress.write(" }\n\n") + elif l+1==config['num_layers']: + ingress.write(" action Layer" + str(l) + "_Process(bit <10> offset){ \n" + " bit <64> weight = 0;\n") + # " meta.NextLayerInput = 0;\n") + for c in range(config['num_classes']): + ingress.write(" weights.read( weight, (bit<32>)offset+" + str(c) + ");\n" + " XNOR(weight);\n" + " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" + " BitCount_c"+str(c)+"(meta.XNOROutput);\n") + ingress.write(" }\n\n") + else: + ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" + " bit <64> weight = 0;\n" + " meta.NextLayerInput = 0;\n") + for h in range(config['num_hidden_nodes'][l]): + ingress.write(" weights.read(weight, (bit<32>)offset+"+str(h)+");\n" + " XNOR(weight);\n" + " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" + " BitCount_l"+str(l)+"(meta.XNOROutput);\n") + ingress.write(" }\n\n") + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + ingress.write(" action BuildInput(){\n") + for f in range(config['num_features']): + if f+1) meta.feature"+str(f)+") << "+str(int(config['width'][f+1]))+";\n") + else: + ingress.write(" meta.bnnInput = (meta.bnnInput + (bit <64>) meta.feature" + str(f) + ") ;\n") + + ingress.write(" }\n\n") + + ################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for idx in range(len(Exact_Table['weights'])): + file.write("register_write SwitchIngress.weights "+str(idx)+" "+str(Exact_Table['weights'][idx])+"\n") + + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + diff --git a/src/models/NN/Type_1/readme.md b/src/models/NN/Type_1/readme.md index 955597d..0a8514a 100644 --- a/src/models/NN/Type_1/readme.md +++ b/src/models/NN/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for NN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for NN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/NN/Type_1/table_generator.py b/src/models/NN/Type_1/table_generator.py index 66a69bb..3aedb4a 100755 --- a/src/models/NN/Type_1/table_generator.py +++ b/src/models/NN/Type_1/table_generator.py @@ -1,305 +1,305 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import math - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os -from src.functions.logic_gates import * -import time - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - - -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -from src.models.NN.Type_1.BinaryNet.models.xnor_layers import * -from src.models.NN.Type_1.BinaryNet.models.xnor_mlp import * -from sklearn.metrics import * -import torch.nn as nn -from torch.autograd import Variable as V -from torch.utils.data import DataLoader, Dataset, TensorDataset -import torch -from src.models.NN.Type_1.BinaryNet.models import * -from src.models.NN.Type_1.BinaryNet.classifiers.xnor_classifier import * -from src.functions.numeric_conversion import * - - -def bintoint(binary): - number = 0 - for b in binary: - number = (2 * number) + int(b) - return number - - -def convert_weight_to_register_data(weight_data): - weight = [] - weights = [] - for i in weight_data: - for j in i: - if j < 0: - weight.append(0) - else: - weight.append(1) - weights.append(bintoint(weight)) - weight.clear() - return weights - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') - Planter_config['model config']['batch size'] = int(input('- Model batch size? (default = 10) ') or '10') - Planter_config['model config']['num epoch'] = int(input('- Number of training epoch? (default = 15) ') or '15') - Planter_config['model config']['number of layers'] = int(input('- Number of layers? (default = 3) ') or '3') - Planter_config['model config']['num hidden nodes'] = [] - for l in range(Planter_config['model config']['number of layers']-1): - Planter_config['model config']['num hidden nodes'] += [int(input('- Number of hidden nodes for layer '+str(l+1)+'? (default = 56) ') or '56')] - - num_layers = Planter_config['model config']['number of layers'] - num_hidden_nodes = Planter_config['model config']['num hidden nodes'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - learning_rate = Planter_config['model config']['learning rate'] - batch_size = Planter_config['model config']['batch size'] - num_epoch = Planter_config['model config']['num epoch'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - width = [] - for f in range(num_features): - width += [np.ceil(math.log(feature_max[f],2))] - width_row = int(np.sum(width)) - - - total_count = np.shape(train_X.values)[0] + np.shape(test_X.values)[0] - count = 0 - - train_X_new = [] - test_X_new = [] - for i in range(np.shape(train_X.values)[0]): - flag = 0 - row = (np.zeros(int(width_row))) - for f in range(num_features): - code = ten_to_bin(train_X.values[i][f],width[f]) - for d in range(int(width[f])): - row[flag] = int(code[d]) - flag += 1 - train_X_new += [row] - # ====================== - count += 1 - percent = int(np.ceil(50 * count / total_count)) - print('\rProcessing the raw Data [' + percent * '#' + (50 - percent) * '-' + '] ' + str( int(np.round(100 * count / total_count))) + "%", end="") - # ====================== - train_X_new = np.array(train_X_new) - - for i in range(np.shape(test_X.values)[0]): - flag = 0 - row = (np.zeros(int(width_row))) - for f in range(num_features): - code = ten_to_bin(test_X.values[i][f],width[f]) - for d in range(int(width[f])): - row[flag] = int(code[d]) - flag += 1 - test_X_new += [row] - # ====================== - count += 1 - percent = int(np.ceil(50 * count / total_count)) - print('\rProcessing the raw data [' + percent * '#' + (50 - percent) * '-' + '] ' + str(int(np.round(100 * count / total_count))) + "%", end="") - # ====================== - test_X_new = np.array(test_X_new) - print('\nData set is ready') - - - ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 - tensor_x = torch.from_numpy(train_X_new.astype(np.float32)) - tensor_y = torch.LongTensor(train_y.astype(np.float32)) - test_X = torch.from_numpy(test_X_new.astype(np.float32)) - test_y = torch.LongTensor(test_y.astype(np.float32)) - my_train_dataset = TensorDataset(tensor_x, tensor_y) - my_test_dataset = TensorDataset(test_X, test_y) - train_loader = DataLoader(my_train_dataset, batch_size=batch_size, shuffle=False) - test_loader = DataLoader(my_test_dataset, batch_size=batch_size, shuffle=False) - - cuda = torch.cuda.is_available() - device = torch.device('cuda' if cuda else 'cpu') - torch.manual_seed(0) - if cuda: - torch.backends.cudnn.deterministic = True - torch.cuda.manual_seed(0) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - model = eval('mlp')(width_row, num_hidden_nodes, num_layers, num_classes) - model.to(device) - - classification = XnorClassifier(model, train_loader, test_loader, device) - - criterion = torch.nn.CrossEntropyLoss() - criterion.to(device) - - if hasattr(model, 'init_w'): - model.init_w() - - - optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) - - scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [100, 200] , gamma=0.1) - - classification.train(criterion, optimizer, num_epoch, scheduler, Planter_config['directory config']['work']+'/src/temp/mlp') - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - Exact_Table = {} - - Exact_Table['weights'] = [] - for l in range(num_layers): - Exact_Table['weights'] += convert_weight_to_register_data(model.classifier._modules['layer'+str(l)].weight.detach().numpy()) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "NN" - Planter_config['p4 config']["num hidden nodes"] = num_hidden_nodes - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of layers"] = num_layers - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["width of inputs"] = width - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return test_y.tolist() - -def bits_on_count(x): - return sum(c=='1' for c in bin(x)) - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_hidden_nodes = Planter_config['p4 config']["num hidden nodes"] - num_layers = Planter_config['p4 config']["number of layers"] - width = Planter_config['p4 config']["width of inputs"] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - - correct = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - input = '' - for f in range(num_features): - input += ten_to_bin(test_X.values[i][f],width[f]) - input = int(input, 2) - # print(' - ', input) - node_num = 0 - for l in range(num_layers): - if l == 0: - num_bits = int(np.sum(width)) - else: - num_bits = int(num_hidden_nodes[l - 1]) - next_layer_input = '' - if l+1 != num_layers: - for n in range(num_hidden_nodes[l]): - # value = XNOR(input,Exact_Table['weights'][node_num]) - value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) - # print(ten_to_bin(value,120)) - # if n+1 == num_hidden_nodes[l]: - # if n==0 and l ==0: - # print('- ',input,' xnor ', Exact_Table['weights'][node_num], ' = ', value) - value = bits_on_count(value) - # print(value) - node_num += 1 - if l==0: - threshold = np.floor(np.sum(width)/2) - else: - threshold = np.floor(num_hidden_nodes[l-1]/2) - if value> threshold: - next_layer_input += '1' - else: - next_layer_input += '0' - - - input = int(next_layer_input,2) - - else: - result = np.zeros(num_classes).tolist() - for c in range(num_classes): - # value = XNOR(input, Exact_Table['weights'][node_num]) - value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) - value = bits_on_count(value) - result[c] = copy.deepcopy(value) - node_num += 1 - - - switch_prediction = result.index(np.max(result)) - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, M/A format macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), accuracy_score(switch_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of NN is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import math + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os +from src.functions.logic_gates import * +import time + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + + +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +from src.models.NN.Type_1.BinaryNet.models.xnor_layers import * +from src.models.NN.Type_1.BinaryNet.models.xnor_mlp import * +from sklearn.metrics import * +import torch.nn as nn +from torch.autograd import Variable as V +from torch.utils.data import DataLoader, Dataset, TensorDataset +import torch +from src.models.NN.Type_1.BinaryNet.models import * +from src.models.NN.Type_1.BinaryNet.classifiers.xnor_classifier import * +from src.functions.numeric_conversion import * + + +def bintoint(binary): + number = 0 + for b in binary: + number = (2 * number) + int(b) + return number + + +def convert_weight_to_register_data(weight_data): + weight = [] + weights = [] + for i in weight_data: + for j in i: + if j < 0: + weight.append(0) + else: + weight.append(1) + weights.append(bintoint(weight)) + weight.clear() + return weights + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') + Planter_config['model config']['batch size'] = int(input('- Model batch size? (default = 10) ') or '10') + Planter_config['model config']['num epoch'] = int(input('- Number of training epoch? (default = 15) ') or '15') + Planter_config['model config']['number of layers'] = int(input('- Number of layers? (default = 3) ') or '3') + Planter_config['model config']['num hidden nodes'] = [] + for l in range(Planter_config['model config']['number of layers']-1): + Planter_config['model config']['num hidden nodes'] += [int(input('- Number of hidden nodes for layer '+str(l+1)+'? (default = 56) ') or '56')] + + num_layers = Planter_config['model config']['number of layers'] + num_hidden_nodes = Planter_config['model config']['num hidden nodes'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + learning_rate = Planter_config['model config']['learning rate'] + batch_size = Planter_config['model config']['batch size'] + num_epoch = Planter_config['model config']['num epoch'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + width = [] + for f in range(num_features): + width += [np.ceil(math.log(feature_max[f],2))] + width_row = int(np.sum(width)) + + + total_count = np.shape(train_X.values)[0] + np.shape(test_X.values)[0] + count = 0 + + train_X_new = [] + test_X_new = [] + for i in range(np.shape(train_X.values)[0]): + flag = 0 + row = (np.zeros(int(width_row))) + for f in range(num_features): + code = ten_to_bin(train_X.values[i][f],width[f]) + for d in range(int(width[f])): + row[flag] = int(code[d]) + flag += 1 + train_X_new += [row] + # ====================== + count += 1 + percent = int(np.ceil(50 * count / total_count)) + print('\rProcessing the raw Data [' + percent * '#' + (50 - percent) * '-' + '] ' + str( int(np.round(100 * count / total_count))) + "%", end="") + # ====================== + train_X_new = np.array(train_X_new) + + for i in range(np.shape(test_X.values)[0]): + flag = 0 + row = (np.zeros(int(width_row))) + for f in range(num_features): + code = ten_to_bin(test_X.values[i][f],width[f]) + for d in range(int(width[f])): + row[flag] = int(code[d]) + flag += 1 + test_X_new += [row] + # ====================== + count += 1 + percent = int(np.ceil(50 * count / total_count)) + print('\rProcessing the raw data [' + percent * '#' + (50 - percent) * '-' + '] ' + str(int(np.round(100 * count / total_count))) + "%", end="") + # ====================== + test_X_new = np.array(test_X_new) + print('\nData set is ready') + + + ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 + tensor_x = torch.from_numpy(train_X_new.astype(np.float32)) + tensor_y = torch.LongTensor(train_y.astype(np.float32)) + test_X = torch.from_numpy(test_X_new.astype(np.float32)) + test_y = torch.LongTensor(test_y.astype(np.float32)) + my_train_dataset = TensorDataset(tensor_x, tensor_y) + my_test_dataset = TensorDataset(test_X, test_y) + train_loader = DataLoader(my_train_dataset, batch_size=batch_size, shuffle=False) + test_loader = DataLoader(my_test_dataset, batch_size=batch_size, shuffle=False) + + cuda = torch.cuda.is_available() + device = torch.device('cuda' if cuda else 'cpu') + torch.manual_seed(0) + if cuda: + torch.backends.cudnn.deterministic = True + torch.cuda.manual_seed(0) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + model = eval('mlp')(width_row, num_hidden_nodes, num_layers, num_classes) + model.to(device) + + classification = XnorClassifier(model, train_loader, test_loader, device) + + criterion = torch.nn.CrossEntropyLoss() + criterion.to(device) + + if hasattr(model, 'init_w'): + model.init_w() + + + optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) + + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [100, 200] , gamma=0.1) + + classification.train(criterion, optimizer, num_epoch, scheduler, Planter_config['directory config']['work']+'/src/temp/mlp') + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + Exact_Table = {} + + Exact_Table['weights'] = [] + for l in range(num_layers): + Exact_Table['weights'] += convert_weight_to_register_data(model.classifier._modules['layer'+str(l)].weight.detach().numpy()) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "NN" + Planter_config['p4 config']["num hidden nodes"] = num_hidden_nodes + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of layers"] = num_layers + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["width of inputs"] = width + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return test_y.tolist() + +def bits_on_count(x): + return sum(c=='1' for c in bin(x)) + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_hidden_nodes = Planter_config['p4 config']["num hidden nodes"] + num_layers = Planter_config['p4 config']["number of layers"] + width = Planter_config['p4 config']["width of inputs"] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + + correct = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + input = '' + for f in range(num_features): + input += ten_to_bin(test_X.values[i][f],width[f]) + input = int(input, 2) + # print(' - ', input) + node_num = 0 + for l in range(num_layers): + if l == 0: + num_bits = int(np.sum(width)) + else: + num_bits = int(num_hidden_nodes[l - 1]) + next_layer_input = '' + if l+1 != num_layers: + for n in range(num_hidden_nodes[l]): + # value = XNOR(input,Exact_Table['weights'][node_num]) + value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) + # print(ten_to_bin(value,120)) + # if n+1 == num_hidden_nodes[l]: + # if n==0 and l ==0: + # print('- ',input,' xnor ', Exact_Table['weights'][node_num], ' = ', value) + value = bits_on_count(value) + # print(value) + node_num += 1 + if l==0: + threshold = np.floor(np.sum(width)/2) + else: + threshold = np.floor(num_hidden_nodes[l-1]/2) + if value> threshold: + next_layer_input += '1' + else: + next_layer_input += '0' + + + input = int(next_layer_input,2) + + else: + result = np.zeros(num_classes).tolist() + for c in range(num_classes): + # value = XNOR(input, Exact_Table['weights'][node_num]) + value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) + value = bits_on_count(value) + result[c] = copy.deepcopy(value) + node_num += 1 + + + switch_prediction = result.index(np.max(result)) + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, M/A format macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), accuracy_score(switch_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of NN is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) print('\n', result) \ No newline at end of file diff --git a/src/models/NN/Type_2/BinaryNet/README.md b/src/models/NN/Type_2/BinaryNet/README.md index 2697020..d1c6639 100755 --- a/src/models/NN/Type_2/BinaryNet/README.md +++ b/src/models/NN/Type_2/BinaryNet/README.md @@ -1,52 +1,52 @@ -# Binary Neural Networks on PyTorch - -![Binarization](https://github.com/lucamocerino/Binary-Neural-Networks-PyTorch-1.0/blob/master/bin.png) - - -This repository implements three popular papers that introduced the concept of Binary Neural Networks: -- **XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks**: https://arxiv.org/abs/1603.05279. -- **Binarized Neural Networks** :https://papers.nips.cc/paper/6573-binarized-neural-networks -- **DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients** :https://arxiv.org/abs/1606.06160 - - - -The project is organized as follows: - - - **models** folder contains CNN models (simple mlp, Network-in-Network, LeNet5, etc.) - - **classifiers/{type}_classifier.py** contains the test and train procedures; where type = {bnn, xnor, dorefa} - - **models/{type}_layers.py** contains the binarylayers implementation (binary activation, binary conv and fully-connected layers, gradient update); where type = {bnn, xnor, dorefa} - - **yml** folder contains configuration files with hyperparameters - - **main.py** represents the entry file - -### Installation - -All packages are in *requirement.txt* -Install the dependencies: - -```sh -pip install -r requirements.txt -``` -### Basic usage -```sh -$ python main.py app:{yml_file} -``` -### Example -Network-in-Network on CIFAR10 dataset. All hyper parameters are in .yml file. -```sh -$ python main.py app:yml/nin_cifar10.yml -``` -## Related Applications -If you find this code useful in your research, please consider citing one of the works in this section. - - - Fast and Accurate Inference on Microcontrollers With Boosted Cooperative Convolutional Neural Networks (BC-Net) https://ieeexplore.ieee.org/abstract/document/9275360 - - CoopNet: Cooperative Convolutional Neural Network for Low-Power MCUs https://ieeexplore.ieee.org/abstract/document/8964993 - - TentacleNet: A Pseudo-Ensemble Template for Accurate Binary Convolutional Neural Networks https://ieeexplore.ieee.org/abstract/document/9073982/ - -License ----- - -MIT - - - - +# Binary Neural Networks on PyTorch + +![Binarization](https://github.com/lucamocerino/Binary-Neural-Networks-PyTorch-1.0/blob/master/bin.png) + + +This repository implements three popular papers that introduced the concept of Binary Neural Networks: +- **XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks**: https://arxiv.org/abs/1603.05279. +- **Binarized Neural Networks** :https://papers.nips.cc/paper/6573-binarized-neural-networks +- **DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients** :https://arxiv.org/abs/1606.06160 + + + +The project is organized as follows: + + - **models** folder contains CNN models (simple mlp, Network-in-Network, LeNet5, etc.) + - **classifiers/{type}_classifier.py** contains the test and train procedures; where type = {bnn, xnor, dorefa} + - **models/{type}_layers.py** contains the binarylayers implementation (binary activation, binary conv and fully-connected layers, gradient update); where type = {bnn, xnor, dorefa} + - **yml** folder contains configuration files with hyperparameters + - **main.py** represents the entry file + +### Installation + +All packages are in *requirement.txt* +Install the dependencies: + +```sh +pip install -r requirements.txt +``` +### Basic usage +```sh +$ python main.py app:{yml_file} +``` +### Example +Network-in-Network on CIFAR10 dataset. All hyper parameters are in .yml file. +```sh +$ python main.py app:yml/nin_cifar10.yml +``` +## Related Applications +If you find this code useful in your research, please consider citing one of the works in this section. + + - Fast and Accurate Inference on Microcontrollers With Boosted Cooperative Convolutional Neural Networks (BC-Net) https://ieeexplore.ieee.org/abstract/document/9275360 + - CoopNet: Cooperative Convolutional Neural Network for Low-Power MCUs https://ieeexplore.ieee.org/abstract/document/8964993 + - TentacleNet: A Pseudo-Ensemble Template for Accurate Binary Convolutional Neural Networks https://ieeexplore.ieee.org/abstract/document/9073982/ + +License +---- + +MIT + + + + diff --git a/src/models/NN/Type_2/BinaryNet/classifiers/bnn_classifier.py b/src/models/NN/Type_2/BinaryNet/classifiers/bnn_classifier.py index 22f271f..6087dd5 100755 --- a/src/models/NN/Type_2/BinaryNet/classifiers/bnn_classifier.py +++ b/src/models/NN/Type_2/BinaryNet/classifiers/bnn_classifier.py @@ -1,112 +1,112 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -import shutil - - -class BnnClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def top1_accuracy(self): - return top1_accuracy(self.model, self.test_loader, self.device) - - - def train_step(self, criterion, optimizer): - losses = [] - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - optimizer.zero_grad() - loss.backward() - for p in self.model.modules(): - if hasattr(p, 'weight_org'): - p.weight.data.copy_(p.weight_org) - optimizer.step() - for p in self.model.modules(): - if hasattr(p, 'weight_org'): - p.weight_org.data.copy_(p.weight.data.clamp_(-1,1)) - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +import shutil + + +class BnnClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def top1_accuracy(self): + return top1_accuracy(self.model, self.test_loader, self.device) + + + def train_step(self, criterion, optimizer): + losses = [] + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + optimizer.zero_grad() + loss.backward() + for p in self.model.modules(): + if hasattr(p, 'weight_org'): + p.weight.data.copy_(p.weight_org) + optimizer.step() + for p in self.model.modules(): + if hasattr(p, 'weight_org'): + p.weight_org.data.copy_(p.weight.data.clamp_(-1,1)) + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_2/BinaryNet/classifiers/dorefa_classifier.py b/src/models/NN/Type_2/BinaryNet/classifiers/dorefa_classifier.py index 2bb54b9..2f9a667 100755 --- a/src/models/NN/Type_2/BinaryNet/classifiers/dorefa_classifier.py +++ b/src/models/NN/Type_2/BinaryNet/classifiers/dorefa_classifier.py @@ -1,109 +1,109 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -import shutil - -class DorefaClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def train_step(self, criterion, optimizer): - losses = [] - self.model.train() - - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - - - data, target = data.to(self.device), target.to(self.device) - optimizer.zero_grad() - - - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - loss.backward() - - optimizer.step() - - - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +import shutil + +class DorefaClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def train_step(self, criterion, optimizer): + losses = [] + self.model.train() + + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + + + data, target = data.to(self.device), target.to(self.device) + optimizer.zero_grad() + + + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + loss.backward() + + optimizer.step() + + + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_2/BinaryNet/classifiers/xnor_classifier.py b/src/models/NN/Type_2/BinaryNet/classifiers/xnor_classifier.py index 9e133f2..de212d1 100755 --- a/src/models/NN/Type_2/BinaryNet/classifiers/xnor_classifier.py +++ b/src/models/NN/Type_2/BinaryNet/classifiers/xnor_classifier.py @@ -1,128 +1,128 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -from src.models.NN.Type_1.BinaryNet.models.xnor_layers import XNORConv2d -import shutil -from sklearn.metrics import * - -class XnorClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - first = True - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - if first: - - nn_pred = pred[:,0].numpy() - label = target.numpy() - # print(nn_pred, output) - first = False - else: - # print(nn_pred, output) - nn_pred = np.hstack((nn_pred, pred[:,0].numpy())) - label = np.hstack((label, target.numpy())) - - result = classification_report(nn_pred, label, digits=4) - print('\n', result) - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def train_step(self, criterion, optimizer): - losses = [] - self.model.train() - - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - - - data, target = data.to(self.device), target.to(self.device) - optimizer.zero_grad() - - - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - loss.backward() - - for m in self.model.modules(): - if isinstance(m, XNORConv2d): - m.update_gradient() - - optimizer.step() - - - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +from src.models.NN.Type_1.BinaryNet.models.xnor_layers import XNORConv2d +import shutil +from sklearn.metrics import * + +class XnorClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + first = True + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + if first: + + nn_pred = pred[:,0].numpy() + label = target.numpy() + # print(nn_pred, output) + first = False + else: + # print(nn_pred, output) + nn_pred = np.hstack((nn_pred, pred[:,0].numpy())) + label = np.hstack((label, target.numpy())) + + result = classification_report(nn_pred, label, digits=4) + print('\n', result) + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def train_step(self, criterion, optimizer): + losses = [] + self.model.train() + + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + + + data, target = data.to(self.device), target.to(self.device) + optimizer.zero_grad() + + + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + loss.backward() + + for m in self.model.modules(): + if isinstance(m, XNORConv2d): + m.update_gradient() + + optimizer.step() + + + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_2/BinaryNet/config.py b/src/models/NN/Type_2/BinaryNet/config.py index 51c5c8e..dd377fe 100755 --- a/src/models/NN/Type_2/BinaryNet/config.py +++ b/src/models/NN/Type_2/BinaryNet/config.py @@ -1,169 +1,169 @@ -"""config utilities for yml file.""" -import os -import sys -import yaml - -# singletone -FLAGS = None - - -class LoaderMeta(type): - """Constructor for supporting `!include`. - """ - def __new__(mcs, __name__, __bases__, __dict__): - """Add include constructer to class.""" - # register the include constructor on the class - cls = super().__new__(mcs, __name__, __bases__, __dict__) - cls.add_constructor('!include', cls.construct_include) - return cls - - -class Loader(yaml.Loader, metaclass=LoaderMeta): - """YAML Loader with `!include` constructor. - """ - def __init__(self, stream): - try: - self._root = os.path.split(stream.name)[0] - except AttributeError: - self._root = os.path.curdir - super().__init__(stream) - - def construct_include(self, node): - """Include file referenced at node.""" - filename = os.path.abspath( - os.path.join(self._root, self.construct_scalar(node))) - extension = os.path.splitext(filename)[1].lstrip('.') - with open(filename, 'r') as f: - if extension in ('yaml', 'yml'): - return yaml.load(f, Loader) - else: - return ''.join(f.readlines()) - - -class AttrDict(dict): - """Dict as attribute trick. - - """ - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, dict): - self.__dict__[key] = AttrDict(value) - elif isinstance(value, list): - if isinstance(value[0], dict): - self.__dict__[key] = [AttrDict(item) for item in value] - else: - self.__dict__[key] = value - - def yaml(self): - """Convert object to yaml dict and return. - - """ - yaml_dict = {} - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, AttrDict): - yaml_dict[key] = value.yaml() - elif isinstance(value, list): - if isinstance(value[0], AttrDict): - new_l = [] - for item in value: - new_l.append(item.yaml()) - yaml_dict[key] = new_l - else: - yaml_dict[key] = value - else: - yaml_dict[key] = value - return yaml_dict - - def __repr__(self): - """Print all variables. - - """ - ret_str = [] - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, AttrDict): - ret_str.append('{}:'.format(key)) - child_ret_str = value.__repr__().split('\n') - for item in child_ret_str: - ret_str.append(' ' + item) - elif isinstance(value, list): - if isinstance(value[0], AttrDict): - ret_str.append('{}:'.format(key)) - for item in value: - # treat as AttrDict above - child_ret_str = item.__repr__().split('\n') - for item in child_ret_str: - ret_str.append(' ' + item) - else: - ret_str.append('{}: {}'.format(key, value)) - else: - ret_str.append('{}: {}'.format(key, value)) - return '\n'.join(ret_str) - - -class Config(AttrDict): - """Config with yaml file. - - This class is used to config model hyper-parameters, global constants, and - other settings with yaml file. All settings in yaml file will be - automatically logged into file. - - Args: - filename(str): File name. - - Examples: - - yaml file ``model.yml``:: - - NAME: 'neuralgym' - ALPHA: 1.0 - DATASET: '/mnt/data/imagenet' - - Usage in .py: - - >>> from neuralgym import Config - >>> config = Config('model.yml') - >>> print(config.NAME) - neuralgym - >>> print(config.ALPHA) - 1.0 - >>> print(config.DATASET) - /mnt/data/imagenet - - """ - - def __init__(self, filename=None, verbose=False): - assert os.path.exists(filename), 'File {} not exist.'.format(filename) - try: - with open(filename, 'r') as f: - cfg_dict = yaml.load(f, Loader) - except EnvironmentError: - print('Please check the file with name of "%s"', filename) - super(Config, self).__init__(cfg_dict) - if verbose: - print(' pi.cfg '.center(80, '-')) - print(self.__repr__()) - print(''.center(80, '-')) - - -def app(): - """Load app via stdin from subprocess""" - global FLAGS - if FLAGS is None: - job_yaml_file = None - for arg in sys.argv: - if arg.startswith('app:'): - job_yaml_file = arg[4:] - if job_yaml_file is None: - job_yaml_file = sys.stdin.readline() - FLAGS = Config(job_yaml_file) - return FLAGS - else: - return FLAGS - - -app() +"""config utilities for yml file.""" +import os +import sys +import yaml + +# singletone +FLAGS = None + + +class LoaderMeta(type): + """Constructor for supporting `!include`. + """ + def __new__(mcs, __name__, __bases__, __dict__): + """Add include constructer to class.""" + # register the include constructor on the class + cls = super().__new__(mcs, __name__, __bases__, __dict__) + cls.add_constructor('!include', cls.construct_include) + return cls + + +class Loader(yaml.Loader, metaclass=LoaderMeta): + """YAML Loader with `!include` constructor. + """ + def __init__(self, stream): + try: + self._root = os.path.split(stream.name)[0] + except AttributeError: + self._root = os.path.curdir + super().__init__(stream) + + def construct_include(self, node): + """Include file referenced at node.""" + filename = os.path.abspath( + os.path.join(self._root, self.construct_scalar(node))) + extension = os.path.splitext(filename)[1].lstrip('.') + with open(filename, 'r') as f: + if extension in ('yaml', 'yml'): + return yaml.load(f, Loader) + else: + return ''.join(f.readlines()) + + +class AttrDict(dict): + """Dict as attribute trick. + + """ + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, dict): + self.__dict__[key] = AttrDict(value) + elif isinstance(value, list): + if isinstance(value[0], dict): + self.__dict__[key] = [AttrDict(item) for item in value] + else: + self.__dict__[key] = value + + def yaml(self): + """Convert object to yaml dict and return. + + """ + yaml_dict = {} + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, AttrDict): + yaml_dict[key] = value.yaml() + elif isinstance(value, list): + if isinstance(value[0], AttrDict): + new_l = [] + for item in value: + new_l.append(item.yaml()) + yaml_dict[key] = new_l + else: + yaml_dict[key] = value + else: + yaml_dict[key] = value + return yaml_dict + + def __repr__(self): + """Print all variables. + + """ + ret_str = [] + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, AttrDict): + ret_str.append('{}:'.format(key)) + child_ret_str = value.__repr__().split('\n') + for item in child_ret_str: + ret_str.append(' ' + item) + elif isinstance(value, list): + if isinstance(value[0], AttrDict): + ret_str.append('{}:'.format(key)) + for item in value: + # treat as AttrDict above + child_ret_str = item.__repr__().split('\n') + for item in child_ret_str: + ret_str.append(' ' + item) + else: + ret_str.append('{}: {}'.format(key, value)) + else: + ret_str.append('{}: {}'.format(key, value)) + return '\n'.join(ret_str) + + +class Config(AttrDict): + """Config with yaml file. + + This class is used to config model hyper-parameters, global constants, and + other settings with yaml file. All settings in yaml file will be + automatically logged into file. + + Args: + filename(str): File name. + + Examples: + + yaml file ``model.yml``:: + + NAME: 'neuralgym' + ALPHA: 1.0 + DATASET: '/mnt/data/imagenet' + + Usage in .py: + + >>> from neuralgym import Config + >>> config = Config('model.yml') + >>> print(config.NAME) + neuralgym + >>> print(config.ALPHA) + 1.0 + >>> print(config.DATASET) + /mnt/data/imagenet + + """ + + def __init__(self, filename=None, verbose=False): + assert os.path.exists(filename), 'File {} not exist.'.format(filename) + try: + with open(filename, 'r') as f: + cfg_dict = yaml.load(f, Loader) + except EnvironmentError: + print('Please check the file with name of "%s"', filename) + super(Config, self).__init__(cfg_dict) + if verbose: + print(' pi.cfg '.center(80, '-')) + print(self.__repr__()) + print(''.center(80, '-')) + + +def app(): + """Load app via stdin from subprocess""" + global FLAGS + if FLAGS is None: + job_yaml_file = None + for arg in sys.argv: + if arg.startswith('app:'): + job_yaml_file = arg[4:] + if job_yaml_file is None: + job_yaml_file = sys.stdin.readline() + FLAGS = Config(job_yaml_file) + return FLAGS + else: + return FLAGS + + +app() diff --git a/src/models/NN/Type_2/BinaryNet/dataloader/__init__.py b/src/models/NN/Type_2/BinaryNet/dataloader/__init__.py index 788a17b..d210291 100755 --- a/src/models/NN/Type_2/BinaryNet/dataloader/__init__.py +++ b/src/models/NN/Type_2/BinaryNet/dataloader/__init__.py @@ -1,2 +1,2 @@ -from .cifar10 import * -from .mnist import * +from .cifar10 import * +from .mnist import * diff --git a/src/models/NN/Type_2/BinaryNet/dataloader/cifar10.py b/src/models/NN/Type_2/BinaryNet/dataloader/cifar10.py index ec1333a..9c18a8e 100755 --- a/src/models/NN/Type_2/BinaryNet/dataloader/cifar10.py +++ b/src/models/NN/Type_2/BinaryNet/dataloader/cifar10.py @@ -1,39 +1,39 @@ -import os -import torch -from torchvision.datasets import CIFAR10 -import torchvision.transforms as tvt - - -def load_train_data(batch_size=64, sampler=None): - transform = tvt.Compose([ - tvt.RandomCrop(32, padding=4), - tvt.RandomHorizontalFlip(), - tvt.ToTensor(), - tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - - if sampler is None: - shuffle = True - else: - shuffle = False - - dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=True, - download=True, transform=transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, - shuffle=shuffle, sampler=sampler, num_workers=4, pin_memory=True) - - return loader - - -def load_test_data(batch_size=1000, sampler=None): - transform = tvt.Compose([ - tvt.ToTensor(), - tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - - dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=False, - download=True, transform=transform) - loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, - shuffle=False, sampler=sampler, num_workers=4, pin_memory=True) - - return loader +import os +import torch +from torchvision.datasets import CIFAR10 +import torchvision.transforms as tvt + + +def load_train_data(batch_size=64, sampler=None): + transform = tvt.Compose([ + tvt.RandomCrop(32, padding=4), + tvt.RandomHorizontalFlip(), + tvt.ToTensor(), + tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + + if sampler is None: + shuffle = True + else: + shuffle = False + + dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=True, + download=True, transform=transform) + loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, + shuffle=shuffle, sampler=sampler, num_workers=4, pin_memory=True) + + return loader + + +def load_test_data(batch_size=1000, sampler=None): + transform = tvt.Compose([ + tvt.ToTensor(), + tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + + dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=False, + download=True, transform=transform) + loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, + shuffle=False, sampler=sampler, num_workers=4, pin_memory=True) + + return loader diff --git a/src/models/NN/Type_2/BinaryNet/dataloader/mnist.py b/src/models/NN/Type_2/BinaryNet/dataloader/mnist.py index 7dcf662..86f69ad 100755 --- a/src/models/NN/Type_2/BinaryNet/dataloader/mnist.py +++ b/src/models/NN/Type_2/BinaryNet/dataloader/mnist.py @@ -1,38 +1,38 @@ -from torch.utils.data import DataLoader -from os.path import join -from torchvision.datasets import MNIST -from torchvision.transforms import Compose, Resize, Normalize, ToTensor - - -def load_train_data(batch_size=128, sampler=None): - cuda = True - loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} - - train_loader = DataLoader( - MNIST(join('datasets', 'mnist'), train=True, download=True, - transform=Compose([ - Resize((28, 28)), - ToTensor(), - Normalize((0.1307,),(0.308,)), - ])), - batch_size=batch_size, shuffle=True, **loader_kwargs) - - return train_loader - -def load_test_data(batch_size=1000, sampler=None): - - cuda = True - loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} - - test_loader = DataLoader( - MNIST(join('datasets', 'mnist'), train=False, download=True, - transform=Compose([ - Resize((28, 28)), - ToTensor(), - Normalize((0.1307,),(0.308,)), - ])), - batch_size= batch_size, shuffle=False,sampler=sampler, **loader_kwargs) - - return test_loader - - +from torch.utils.data import DataLoader +from os.path import join +from torchvision.datasets import MNIST +from torchvision.transforms import Compose, Resize, Normalize, ToTensor + + +def load_train_data(batch_size=128, sampler=None): + cuda = True + loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} + + train_loader = DataLoader( + MNIST(join('datasets', 'mnist'), train=True, download=True, + transform=Compose([ + Resize((28, 28)), + ToTensor(), + Normalize((0.1307,),(0.308,)), + ])), + batch_size=batch_size, shuffle=True, **loader_kwargs) + + return train_loader + +def load_test_data(batch_size=1000, sampler=None): + + cuda = True + loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} + + test_loader = DataLoader( + MNIST(join('datasets', 'mnist'), train=False, download=True, + transform=Compose([ + Resize((28, 28)), + ToTensor(), + Normalize((0.1307,),(0.308,)), + ])), + batch_size= batch_size, shuffle=False,sampler=sampler, **loader_kwargs) + + return test_loader + + diff --git a/src/models/NN/Type_2/BinaryNet/main.py b/src/models/NN/Type_2/BinaryNet/main.py index a174316..cf8da5c 100755 --- a/src/models/NN/Type_2/BinaryNet/main.py +++ b/src/models/NN/Type_2/BinaryNet/main.py @@ -1,49 +1,49 @@ -import torch -from classifiers.xnor_classifier import * -from classifiers.dorefa_classifier import * -from classifiers.bnn_classifier import * -from config import FLAGS -import importlib -from models import * - -cuda = torch.cuda.is_available() and not(FLAGS.no_cuda) -device = torch.device('cuda' if cuda else 'cpu') -torch.manual_seed(0) -if cuda: - torch.backends.cudnn.deterministic=True - torch.cuda.manual_seed(0) - -dataset = importlib.import_module("dataloader.{}".format(FLAGS.dataset)) -train_loader = dataset.load_train_data(FLAGS.batch_size) -test_loader = dataset.load_test_data(FLAGS.test_batch_size) - -model = eval(FLAGS.model)() -model.to(device) - - -if FLAGS.bin_type == 'xnor': - classification = XnorClassifier(model, train_loader, test_loader, device) - -elif FLAGS.bin_type == 'bnn': - classification = BnnClassifier(model, train_loader, test_loader, device) - -elif FLAGS.bin_type == 'dorefa': - classification = DorefaClassifier(model, train_loader, test_loader, device) - -criterion = torch.nn.CrossEntropyLoss() -criterion.to(device) - -if hasattr(model, 'init_w'): - model.init_w() - - -if FLAGS.optimizer == 'adam': - optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr, weight_decay=1e-5) -elif FLAGS.optimizer == 'sgd': - optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=0.9, - weight_decay=5.e-4) - -scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, FLAGS.steps, - gamma=FLAGS.gamma) - -classification.train(criterion, optimizer, FLAGS.epochs, scheduler, FLAGS.checkpoint) +import torch +from classifiers.xnor_classifier import * +from classifiers.dorefa_classifier import * +from classifiers.bnn_classifier import * +from config import FLAGS +import importlib +from models import * + +cuda = torch.cuda.is_available() and not(FLAGS.no_cuda) +device = torch.device('cuda' if cuda else 'cpu') +torch.manual_seed(0) +if cuda: + torch.backends.cudnn.deterministic=True + torch.cuda.manual_seed(0) + +dataset = importlib.import_module("dataloader.{}".format(FLAGS.dataset)) +train_loader = dataset.load_train_data(FLAGS.batch_size) +test_loader = dataset.load_test_data(FLAGS.test_batch_size) + +model = eval(FLAGS.model)() +model.to(device) + + +if FLAGS.bin_type == 'xnor': + classification = XnorClassifier(model, train_loader, test_loader, device) + +elif FLAGS.bin_type == 'bnn': + classification = BnnClassifier(model, train_loader, test_loader, device) + +elif FLAGS.bin_type == 'dorefa': + classification = DorefaClassifier(model, train_loader, test_loader, device) + +criterion = torch.nn.CrossEntropyLoss() +criterion.to(device) + +if hasattr(model, 'init_w'): + model.init_w() + + +if FLAGS.optimizer == 'adam': + optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr, weight_decay=1e-5) +elif FLAGS.optimizer == 'sgd': + optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=0.9, + weight_decay=5.e-4) + +scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, FLAGS.steps, + gamma=FLAGS.gamma) + +classification.train(criterion, optimizer, FLAGS.epochs, scheduler, FLAGS.checkpoint) diff --git a/src/models/NN/Type_2/BinaryNet/models/__init__.py b/src/models/NN/Type_2/BinaryNet/models/__init__.py index 20eb788..923977d 100755 --- a/src/models/NN/Type_2/BinaryNet/models/__init__.py +++ b/src/models/NN/Type_2/BinaryNet/models/__init__.py @@ -1,5 +1,5 @@ -from .xnor_nin import * -from .xnor_lenet import * -from .xnor_mlp import * -from .dorefa_resnet import * -from .bnn_caffenet import * +from .xnor_nin import * +from .xnor_lenet import * +from .xnor_mlp import * +from .dorefa_resnet import * +from .bnn_caffenet import * diff --git a/src/models/NN/Type_2/BinaryNet/models/bnn_caffenet.py b/src/models/NN/Type_2/BinaryNet/models/bnn_caffenet.py index c215779..341f43c 100755 --- a/src/models/NN/Type_2/BinaryNet/models/bnn_caffenet.py +++ b/src/models/NN/Type_2/BinaryNet/models/bnn_caffenet.py @@ -1,61 +1,61 @@ -import torch.nn as nn -from .bnn_layers import * - - -__all__ = ['bnn_caffenet'] - - - -class BNNCaffenet(nn.Module): - - def __init__(self, num_classes=10): - super(BNNCaffenet, self).__init__() - - self.features = nn.Sequential( - - BNNConv2d(3, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - nn.Flatten(), - nn.BatchNorm1d(512), - nn.Hardtanh(inplace=True), - BNNLinear(512, num_classes), - nn.BatchNorm1d(num_classes, affine=False), - nn.LogSoftmax(dim=1), - ) - - def forward(self, x): - return self.features(x) - - - def init_w(self): - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - return - - -def bnn_caffenet(num_classes=10): - return BNNCaffenet(num_classes) - +import torch.nn as nn +from .bnn_layers import * + + +__all__ = ['bnn_caffenet'] + + + +class BNNCaffenet(nn.Module): + + def __init__(self, num_classes=10): + super(BNNCaffenet, self).__init__() + + self.features = nn.Sequential( + + BNNConv2d(3, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + nn.Flatten(), + nn.BatchNorm1d(512), + nn.Hardtanh(inplace=True), + BNNLinear(512, num_classes), + nn.BatchNorm1d(num_classes, affine=False), + nn.LogSoftmax(dim=1), + ) + + def forward(self, x): + return self.features(x) + + + def init_w(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + return + + +def bnn_caffenet(num_classes=10): + return BNNCaffenet(num_classes) + diff --git a/src/models/NN/Type_2/BinaryNet/models/bnn_layers.py b/src/models/NN/Type_2/BinaryNet/models/bnn_layers.py index 14a3879..760e124 100755 --- a/src/models/NN/Type_2/BinaryNet/models/bnn_layers.py +++ b/src/models/NN/Type_2/BinaryNet/models/bnn_layers.py @@ -1,62 +1,62 @@ -import torch -from torch.nn import Module, Conv2d, Linear -from torch.nn.functional import linear, conv2d - - -__all__ = ['BNNLinear', 'BNNConv2d'] - - - - -def Binarize(tensor,quant_mode='det'): - if quant_mode=='det': - return tensor.sign() - if quant_mode=='bin': - return (tensor>=0).type(type(tensor))*2-1 - else: - return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) - - -class BNNLinear(Linear): - - def __init__(self, *kargs, **kwargs): - super(BNNLinear, self).__init__(*kargs, **kwargs) - self.register_buffer('weight_org', self.weight.data.clone()) - - def forward(self, input): - - if (input.size(1) != 784) and (input.size(1) != 3072): - input.data=Binarize(input.data) - - self.weight.data=Binarize(self.weight_org) - out = linear(input, self.weight) - - if not self.bias is None: - self.bias.org=self.bias.data.clone() - out += self.bias.view(1, -1).expand_as(out) - - return out - - -class BNNConv2d(Conv2d): - - def __init__(self, *kargs, **kwargs): - super(BNNConv2d, self).__init__(*kargs, **kwargs) - self.register_buffer('weight_org', self.weight.data.clone()) - - def forward(self, input): - if input.size(1) != 3: - input.data = Binarize(input.data) - - self.weight.data=Binarize(self.weight_org) - - - out = conv2d(input, self.weight, None, self.stride, - self.padding, self.dilation, self.groups) - - if not self.bias is None: - self.bias.org=self.bias.data.clone() - out += self.bias.view(1, -1, 1, 1).expand_as(out) - - return out - +import torch +from torch.nn import Module, Conv2d, Linear +from torch.nn.functional import linear, conv2d + + +__all__ = ['BNNLinear', 'BNNConv2d'] + + + + +def Binarize(tensor,quant_mode='det'): + if quant_mode=='det': + return tensor.sign() + if quant_mode=='bin': + return (tensor>=0).type(type(tensor))*2-1 + else: + return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) + + +class BNNLinear(Linear): + + def __init__(self, *kargs, **kwargs): + super(BNNLinear, self).__init__(*kargs, **kwargs) + self.register_buffer('weight_org', self.weight.data.clone()) + + def forward(self, input): + + if (input.size(1) != 784) and (input.size(1) != 3072): + input.data=Binarize(input.data) + + self.weight.data=Binarize(self.weight_org) + out = linear(input, self.weight) + + if not self.bias is None: + self.bias.org=self.bias.data.clone() + out += self.bias.view(1, -1).expand_as(out) + + return out + + +class BNNConv2d(Conv2d): + + def __init__(self, *kargs, **kwargs): + super(BNNConv2d, self).__init__(*kargs, **kwargs) + self.register_buffer('weight_org', self.weight.data.clone()) + + def forward(self, input): + if input.size(1) != 3: + input.data = Binarize(input.data) + + self.weight.data=Binarize(self.weight_org) + + + out = conv2d(input, self.weight, None, self.stride, + self.padding, self.dilation, self.groups) + + if not self.bias is None: + self.bias.org=self.bias.data.clone() + out += self.bias.view(1, -1, 1, 1).expand_as(out) + + return out + diff --git a/src/models/NN/Type_2/BinaryNet/models/dorefa_layers.py b/src/models/NN/Type_2/BinaryNet/models/dorefa_layers.py index b1dad33..9388b5b 100755 --- a/src/models/NN/Type_2/BinaryNet/models/dorefa_layers.py +++ b/src/models/NN/Type_2/BinaryNet/models/dorefa_layers.py @@ -1,110 +1,110 @@ -import torch -import numpy as np -from torch.autograd import Function -from torch.nn import Conv2d, Linear -from torch.nn.functional import linear, conv2d - -__all__ = ['DOREFAConv2d','DOREFALinear'] - - -class ScaleSigner(Function): - """take a real value x, output sign(x)*E(|x|)""" - @staticmethod - def forward(ctx, input): - return torch.sign(input) * torch.mean(torch.abs(input)) - - @staticmethod - def backward(ctx, grad_output): - return grad_output - - -def scale_sign(input): - return ScaleSigner.apply(input) - - -class Quantizer(Function): - @staticmethod - def forward(ctx, input, nbit): - scale = 2 ** nbit - 1 - return torch.round(input * scale) / scale - - @staticmethod - def backward(ctx, grad_output): - return grad_output, None - - -def quantize(input, nbit): - return Quantizer.apply(input, nbit) - - -def dorefa_w(w, nbit_w): - if nbit_w == 1: - w = scale_sign(w) - else: - w = torch.tanh(w) - w = w / (2 * torch.max(torch.abs(w))) + 0.5 - w = 2 * quantize(w, nbit_w) - 1 - - return w - - -def dorefa_a(input, nbit_a): - return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a) - - -class DOREFAConv2d(Conv2d): - """docstring for QuanConv""" - def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, - nbit_a=1, stride=1, - padding=0, dilation=1, groups=1, - bias=True): - super(DOREFAConv2d, self).__init__( - in_channels, out_channels, kernel_size, stride, padding, dilation, - groups, bias) - self.nbit_w = nbit_w - self.nbit_a = nbit_a - name_w_dict = {'dorefa': dorefa_w} - name_a_dict = {'dorefa': dorefa_a} - self.quan_w = name_w_dict[quan_name_w] - self.quan_a = name_a_dict[quan_name_a] - - def forward(self, input): - if self.nbit_w < 32: - w = self.quan_w(self.weight, self.nbit_w) - else: - w = self.weight - - if self.nbit_a < 32: - x = self.quan_a(input, self.nbit_a) - else: - x = input - - output = conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) - - return output - -class DOREFALinear(Linear): - def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, nbit_a=1): - super(DOREFALinear, self).__init__(in_features, out_features, bias) - self.nbit_w = nbit_w - self.nbit_a = nbit_a - name_w_dict = {'dorefa': dorefa_w} - name_a_dict = {'dorefa': dorefa_a} - self.quan_w = name_w_dict[quan_name_w] - self.quan_a = name_a_dict[quan_name_a] - - def forward(self, input): - if self.nbit_w < 32: - w = self.quan_w(self.weight, self.nbit_w) - else: - w = self.weight - - if self.nbit_a < 32: - x = self.quan_a(input, self.nbit_a) - else: - x = input - - - output = linear(x, w, self.bias) - - return output +import torch +import numpy as np +from torch.autograd import Function +from torch.nn import Conv2d, Linear +from torch.nn.functional import linear, conv2d + +__all__ = ['DOREFAConv2d','DOREFALinear'] + + +class ScaleSigner(Function): + """take a real value x, output sign(x)*E(|x|)""" + @staticmethod + def forward(ctx, input): + return torch.sign(input) * torch.mean(torch.abs(input)) + + @staticmethod + def backward(ctx, grad_output): + return grad_output + + +def scale_sign(input): + return ScaleSigner.apply(input) + + +class Quantizer(Function): + @staticmethod + def forward(ctx, input, nbit): + scale = 2 ** nbit - 1 + return torch.round(input * scale) / scale + + @staticmethod + def backward(ctx, grad_output): + return grad_output, None + + +def quantize(input, nbit): + return Quantizer.apply(input, nbit) + + +def dorefa_w(w, nbit_w): + if nbit_w == 1: + w = scale_sign(w) + else: + w = torch.tanh(w) + w = w / (2 * torch.max(torch.abs(w))) + 0.5 + w = 2 * quantize(w, nbit_w) - 1 + + return w + + +def dorefa_a(input, nbit_a): + return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a) + + +class DOREFAConv2d(Conv2d): + """docstring for QuanConv""" + def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, + nbit_a=1, stride=1, + padding=0, dilation=1, groups=1, + bias=True): + super(DOREFAConv2d, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias) + self.nbit_w = nbit_w + self.nbit_a = nbit_a + name_w_dict = {'dorefa': dorefa_w} + name_a_dict = {'dorefa': dorefa_a} + self.quan_w = name_w_dict[quan_name_w] + self.quan_a = name_a_dict[quan_name_a] + + def forward(self, input): + if self.nbit_w < 32: + w = self.quan_w(self.weight, self.nbit_w) + else: + w = self.weight + + if self.nbit_a < 32: + x = self.quan_a(input, self.nbit_a) + else: + x = input + + output = conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) + + return output + +class DOREFALinear(Linear): + def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, nbit_a=1): + super(DOREFALinear, self).__init__(in_features, out_features, bias) + self.nbit_w = nbit_w + self.nbit_a = nbit_a + name_w_dict = {'dorefa': dorefa_w} + name_a_dict = {'dorefa': dorefa_a} + self.quan_w = name_w_dict[quan_name_w] + self.quan_a = name_a_dict[quan_name_a] + + def forward(self, input): + if self.nbit_w < 32: + w = self.quan_w(self.weight, self.nbit_w) + else: + w = self.weight + + if self.nbit_a < 32: + x = self.quan_a(input, self.nbit_a) + else: + x = input + + + output = linear(x, w, self.bias) + + return output diff --git a/src/models/NN/Type_2/BinaryNet/models/dorefa_resnet.py b/src/models/NN/Type_2/BinaryNet/models/dorefa_resnet.py index 92ebbb9..3f426d6 100755 --- a/src/models/NN/Type_2/BinaryNet/models/dorefa_resnet.py +++ b/src/models/NN/Type_2/BinaryNet/models/dorefa_resnet.py @@ -1,154 +1,154 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .dorefa_layers import DOREFAConv2d as Conv -from .dorefa_layers import DOREFALinear as Linear - -__all__ = ['dorefa_resnet18'] - - -def conv3x3(in_planes, out_planes, wbit, abit, stride=1): - """3x3 convolution with padding""" - return Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, nbit_w=wbit, nbit_a=abit) - - -def conv1x1(in_planes, out_planes, wbit, abit, stride=1): - """1x1 convolution""" - return Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, nbit_w=wbit, nbit_a=abit) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, in_planes, planes, wbit, abit, sparsity_list, stride=1): - super(BasicBlock, self).__init__() - - self.bb = nn.Sequential( - conv3x3(in_planes, planes, wbit=wbit, abit=abit, stride=stride), - nn.BatchNorm2d(planes), - nn.ReLU(inplace=True), - conv3x3(planes, planes, wbit=wbit, abit=abit, stride=1), - nn.BatchNorm2d(planes), - ) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - conv1x1(in_planes, self.expansion*planes, wbit=wbit, abit=abit, stride=stride), - nn.BatchNorm2d(self.expansion*planes,sparsity_list) - ) - - def forward(self, x): - out = self.bb(x) - out += self.shortcut(x) - out = F.relu(out) - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, in_planes, planes, wbit, abit, stride=1): - super(Bottleneck, self).__init__() - self.conv1 = conv1x1(in_planes, planes, wbit=wbit, abit=abit, stride=1) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = conv3x3(planes, planes, wbit=wbit, abit=abit, stride=stride) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = conv1x1(planes, self.expansion*planes,wbit=wbit, abit=abit, stride=1) - self.bn3 = nn.BatchNorm2d(self.expansion*planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - conv1x1(in_planes, self.expansion*planes,wbit=wbit,abit=abit,stride=stride), - nn.BatchNorm2d(self.expansion*planes) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - - - - - -class ResNet(nn.Module): - def __init__(self, block, num_blocks, wbit=1, abit=1, num_classes=10): - super(ResNet, self).__init__() - self.in_planes = 64 - - self.head = nn.Sequential( - nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), - nn.BatchNorm2d(64), - nn.ReLU(inplace=True), - ) - - self.layer1 = self._make_layer(block, 64, num_blocks[0], wbit=wbit, abit=abit, stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], wbit=wbit, abit=abit, stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], wbit=wbit, abit=abit, stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], wbit=wbit, abit=abit, stride=2) - - self.tail = nn.Sequential( - nn.AdaptiveAvgPool2d(1), - nn.Flatten(), - nn.Linear(512*block.expansion, num_classes), - ) - - def init_w(self): - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - return - - def _make_layer(self, block, planes, num_blocks, wbit, abit, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, wbit, abit, stride)) - self.in_planes = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - - out = self.head(x) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = self.tail(out) - return out - - - - -def dorefa_resnet18(wbit=1, abit=1): - return ResNet(BasicBlock, [2,2,2,2], wbit=wbit, abit=abit) - -def ResNet34(wbit, abit): - return ResNet(BasicBlock, [3,4,6,3], wbit=wbit, abit=abit) - -def ResNet50(wbit, abit): - return ResNet(Bottleneck, [3,4,6,3], wbit=wbit, abit=abit) - -def ResNet101(wbit, abit): - return ResNet(Bottleneck, [3,4,23,3], wbit=wbit, abit=abit) - -def ResNet152(wbit, abit): - return ResNet(Bottleneck, [3,8,36,3], wbit=wbit, abit=abit) - - +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .dorefa_layers import DOREFAConv2d as Conv +from .dorefa_layers import DOREFALinear as Linear + +__all__ = ['dorefa_resnet18'] + + +def conv3x3(in_planes, out_planes, wbit, abit, stride=1): + """3x3 convolution with padding""" + return Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, nbit_w=wbit, nbit_a=abit) + + +def conv1x1(in_planes, out_planes, wbit, abit, stride=1): + """1x1 convolution""" + return Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, nbit_w=wbit, nbit_a=abit) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, wbit, abit, sparsity_list, stride=1): + super(BasicBlock, self).__init__() + + self.bb = nn.Sequential( + conv3x3(in_planes, planes, wbit=wbit, abit=abit, stride=stride), + nn.BatchNorm2d(planes), + nn.ReLU(inplace=True), + conv3x3(planes, planes, wbit=wbit, abit=abit, stride=1), + nn.BatchNorm2d(planes), + ) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + conv1x1(in_planes, self.expansion*planes, wbit=wbit, abit=abit, stride=stride), + nn.BatchNorm2d(self.expansion*planes,sparsity_list) + ) + + def forward(self, x): + out = self.bb(x) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, wbit, abit, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(in_planes, planes, wbit=wbit, abit=abit, stride=1) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, wbit=wbit, abit=abit, stride=stride) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, self.expansion*planes,wbit=wbit, abit=abit, stride=1) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + conv1x1(in_planes, self.expansion*planes,wbit=wbit,abit=abit,stride=stride), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + + + + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, wbit=1, abit=1, num_classes=10): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.head = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + ) + + self.layer1 = self._make_layer(block, 64, num_blocks[0], wbit=wbit, abit=abit, stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], wbit=wbit, abit=abit, stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], wbit=wbit, abit=abit, stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], wbit=wbit, abit=abit, stride=2) + + self.tail = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Flatten(), + nn.Linear(512*block.expansion, num_classes), + ) + + def init_w(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + return + + def _make_layer(self, block, planes, num_blocks, wbit, abit, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, wbit, abit, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + + out = self.head(x) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = self.tail(out) + return out + + + + +def dorefa_resnet18(wbit=1, abit=1): + return ResNet(BasicBlock, [2,2,2,2], wbit=wbit, abit=abit) + +def ResNet34(wbit, abit): + return ResNet(BasicBlock, [3,4,6,3], wbit=wbit, abit=abit) + +def ResNet50(wbit, abit): + return ResNet(Bottleneck, [3,4,6,3], wbit=wbit, abit=abit) + +def ResNet101(wbit, abit): + return ResNet(Bottleneck, [3,4,23,3], wbit=wbit, abit=abit) + +def ResNet152(wbit, abit): + return ResNet(Bottleneck, [3,8,36,3], wbit=wbit, abit=abit) + + diff --git a/src/models/NN/Type_2/BinaryNet/models/xnor_layers.py b/src/models/NN/Type_2/BinaryNet/models/xnor_layers.py index 203f100..1318c2b 100755 --- a/src/models/NN/Type_2/BinaryNet/models/xnor_layers.py +++ b/src/models/NN/Type_2/BinaryNet/models/xnor_layers.py @@ -1,145 +1,145 @@ -from torch import zeros -from torch.autograd import Function -from torch.nn import Parameter, Module, Conv2d, Linear, BatchNorm1d, BatchNorm2d, Dropout, ReLU - - -__all__ = ['XNORConv2d', 'XNORLinear', 'BNConvReLU','BNLinearReLU'] - - -class BinActive(Function): - @staticmethod - def forward(ctx, input): - ctx.save_for_backward(input) - input = input.sign() - return input - - @staticmethod - def backward(ctx, grad_output): - input, = ctx.saved_tensors - grad_input = grad_output.clone() - grad_input[input.ge(1)] = 0 - grad_input[input.le(-1)] = 0 - return grad_input - - -class XNORConv2d(Module): - def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): - super(XNORConv2d, self).__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.padding = padding - self.groups = groups - - self.conv = Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = kernel_size, stride = stride, padding = padding, groups = groups) - self.conv.weight.data.normal_(0, 0.05) - self.conv.bias.data.zero_() - - self.fp_weights = Parameter(zeros(self.conv.weight.size())) - self.fp_weights.data.copy_(self.conv.weight.data) - - def forward(self, x): - - self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) - self.fp_weights.data.clamp_(-1, 1) - self.mean_val = self.fp_weights.abs().view(self.out_channels, -1).mean(1, keepdim=True) - - self.conv.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1, 1, 1)) - x = self.conv(x) - - return x - - def update_gradient(self): - proxy = self.fp_weights.abs().sign() - proxy[self.fp_weights.data.abs()>1] = 0 - binary_grad = self.conv.weight.grad * self.mean_val.view(-1, 1, 1, 1) * proxy - - mean_grad = self.conv.weight.data.sign() * self.conv.weight.grad - mean_grad = mean_grad.view(self.out_channels, -1).mean(1).view(-1, 1, 1, 1) - mean_grad = mean_grad * self.conv.weight.data.sign() - - self.fp_weights.grad = binary_grad + mean_grad - self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) - -class BNConvReLU(Module): - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): - super(BNConvReLU, self).__init__() - self.dropout = dropout_ratio - self.a_active = BinActive.apply - - self.bn = BatchNorm2d(in_channels, eps=1e-4, momentum=0.1, affine=True) - if self.dropout !=0: - self.drop = Dropout(self.dropout, inplace=True) - self.econv = XNORConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias) - self.relu = ReLU(inplace=True) - - def forward(self, x): - - x = self.bn(x) - x = self.a_active(x) - if self.dropout !=0: - x = self.drop(x) - - x = self.econv(x) - x = self.relu(x) - return x - - -class XNORLinear(Module): - def __init__(self, in_features, out_features, bias=True): - super(XNORLinear, self).__init__() - self.in_features = in_features - self.out_features = out_features - self.bias = bias - - self.linear = Linear(in_features = in_features, out_features = out_features, bias = bias) - self.fp_weights = Parameter(zeros(self.linear.weight.size())) - self.fp_weights.data.copy_(self.linear.weight.data) - - def forward(self, x): - self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) - self.fp_weights.data.clamp_(-1, 1) - - self.mean_val = self.fp_weights.abs().view(self.out_features, -1).mean(1, keepdim=True) - - self.linear.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1)) - x = self.linear(x) - return x - - def update_gradient(self): - proxy = self.fp_weights.abs().sign() - proxy[self.fp_weights.data.abs()>1] = 0 - binary_grad = self.linear.weight.grad * self.mean_val.view(-1, 1) * proxy - - mean_grad = self.linear.weight.data.sign() * self.linear.weight.grad - mean_grad = mean_grad.view(self.out_features, -1).mean(1).view(-1, 1) - mean_grad = mean_grad * self.linear.weight.data.sign() - - self.fp_weights.grad = binary_grad + mean_grad - self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) - return - -class BNLinearReLU(Module): - def __init__(self, in_channels, out_channels, bias=True, dropout_ratio=0): - super(BNLinearReLU, self).__init__() - self.dropout = dropout_ratio - self.a_active = BinActive.apply - - self.bn = BatchNorm1d(in_channels, eps=1e-4, momentum=0.1, affine=True) - if self.dropout !=0: - self.drop = Dropout(self.dropout, inplace=True) - self.fc = XNORLinear(in_channels, out_channels, bias=bias) - self.relu = ReLU(inplace=True) - - def forward(self, x): - - x = self.bn(x) - x = self.a_active(x) - if self.dropout !=0: - x = self.drop(x) - - x = self.fc(x) - x = self.relu(x) - return x +from torch import zeros +from torch.autograd import Function +from torch.nn import Parameter, Module, Conv2d, Linear, BatchNorm1d, BatchNorm2d, Dropout, ReLU + + +__all__ = ['XNORConv2d', 'XNORLinear', 'BNConvReLU','BNLinearReLU'] + + +class BinActive(Function): + @staticmethod + def forward(ctx, input): + ctx.save_for_backward(input) + input = input.sign() + return input + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + grad_input = grad_output.clone() + grad_input[input.ge(1)] = 0 + grad_input[input.le(-1)] = 0 + return grad_input + + +class XNORConv2d(Module): + def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): + super(XNORConv2d, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.groups = groups + + self.conv = Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = kernel_size, stride = stride, padding = padding, groups = groups) + self.conv.weight.data.normal_(0, 0.05) + self.conv.bias.data.zero_() + + self.fp_weights = Parameter(zeros(self.conv.weight.size())) + self.fp_weights.data.copy_(self.conv.weight.data) + + def forward(self, x): + + self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) + self.fp_weights.data.clamp_(-1, 1) + self.mean_val = self.fp_weights.abs().view(self.out_channels, -1).mean(1, keepdim=True) + + self.conv.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1, 1, 1)) + x = self.conv(x) + + return x + + def update_gradient(self): + proxy = self.fp_weights.abs().sign() + proxy[self.fp_weights.data.abs()>1] = 0 + binary_grad = self.conv.weight.grad * self.mean_val.view(-1, 1, 1, 1) * proxy + + mean_grad = self.conv.weight.data.sign() * self.conv.weight.grad + mean_grad = mean_grad.view(self.out_channels, -1).mean(1).view(-1, 1, 1, 1) + mean_grad = mean_grad * self.conv.weight.data.sign() + + self.fp_weights.grad = binary_grad + mean_grad + self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) + +class BNConvReLU(Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): + super(BNConvReLU, self).__init__() + self.dropout = dropout_ratio + self.a_active = BinActive.apply + + self.bn = BatchNorm2d(in_channels, eps=1e-4, momentum=0.1, affine=True) + if self.dropout !=0: + self.drop = Dropout(self.dropout, inplace=True) + self.econv = XNORConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias) + self.relu = ReLU(inplace=True) + + def forward(self, x): + + x = self.bn(x) + x = self.a_active(x) + if self.dropout !=0: + x = self.drop(x) + + x = self.econv(x) + x = self.relu(x) + return x + + +class XNORLinear(Module): + def __init__(self, in_features, out_features, bias=True): + super(XNORLinear, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.bias = bias + + self.linear = Linear(in_features = in_features, out_features = out_features, bias = bias) + self.fp_weights = Parameter(zeros(self.linear.weight.size())) + self.fp_weights.data.copy_(self.linear.weight.data) + + def forward(self, x): + self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) + self.fp_weights.data.clamp_(-1, 1) + + self.mean_val = self.fp_weights.abs().view(self.out_features, -1).mean(1, keepdim=True) + + self.linear.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1)) + x = self.linear(x) + return x + + def update_gradient(self): + proxy = self.fp_weights.abs().sign() + proxy[self.fp_weights.data.abs()>1] = 0 + binary_grad = self.linear.weight.grad * self.mean_val.view(-1, 1) * proxy + + mean_grad = self.linear.weight.data.sign() * self.linear.weight.grad + mean_grad = mean_grad.view(self.out_features, -1).mean(1).view(-1, 1) + mean_grad = mean_grad * self.linear.weight.data.sign() + + self.fp_weights.grad = binary_grad + mean_grad + self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) + return + +class BNLinearReLU(Module): + def __init__(self, in_channels, out_channels, bias=True, dropout_ratio=0): + super(BNLinearReLU, self).__init__() + self.dropout = dropout_ratio + self.a_active = BinActive.apply + + self.bn = BatchNorm1d(in_channels, eps=1e-4, momentum=0.1, affine=True) + if self.dropout !=0: + self.drop = Dropout(self.dropout, inplace=True) + self.fc = XNORLinear(in_channels, out_channels, bias=bias) + self.relu = ReLU(inplace=True) + + def forward(self, x): + + x = self.bn(x) + x = self.a_active(x) + if self.dropout !=0: + x = self.drop(x) + + x = self.fc(x) + x = self.relu(x) + return x diff --git a/src/models/NN/Type_2/BinaryNet/models/xnor_lenet.py b/src/models/NN/Type_2/BinaryNet/models/xnor_lenet.py index 444c1f9..ba2031b 100755 --- a/src/models/NN/Type_2/BinaryNet/models/xnor_lenet.py +++ b/src/models/NN/Type_2/BinaryNet/models/xnor_lenet.py @@ -1,45 +1,45 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['lenet5'] - -class LeNet5(nn.Module): - def __init__(self, out_classes = 10): - super(LeNet5, self).__init__() - self.features = nn.Sequential( - nn.Conv2d(1, 20, kernel_size=5, stride=1), - nn.BatchNorm2d(20, eps=1e-4, momentum=0.1, affine=False), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=2, stride=2), - XNORConv2d(20, 50, kernel_size=5, stride=1, padding=0), - nn.MaxPool2d(kernel_size=2, stride=2), - nn.Flatten(), - ) - self.classifier = nn.Sequential( - BNLinearReLU(800, 500), - nn.BatchNorm1d(500, eps=1e-4, momentum=0.1, affine=False), - nn.Linear(500, out_classes), - ) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.features(x) - x = self.classifier(x) - return x - -def lenet5(out_classes=10): - return LeNet5(out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['lenet5'] + +class LeNet5(nn.Module): + def __init__(self, out_classes = 10): + super(LeNet5, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 20, kernel_size=5, stride=1), + nn.BatchNorm2d(20, eps=1e-4, momentum=0.1, affine=False), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + XNORConv2d(20, 50, kernel_size=5, stride=1, padding=0), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Flatten(), + ) + self.classifier = nn.Sequential( + BNLinearReLU(800, 500), + nn.BatchNorm1d(500, eps=1e-4, momentum=0.1, affine=False), + nn.Linear(500, out_classes), + ) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.features(x) + x = self.classifier(x) + return x + +def lenet5(out_classes=10): + return LeNet5(out_classes) diff --git a/src/models/NN/Type_2/BinaryNet/models/xnor_mlp.py b/src/models/NN/Type_2/BinaryNet/models/xnor_mlp.py index b8768e6..b4dc26a 100755 --- a/src/models/NN/Type_2/BinaryNet/models/xnor_mlp.py +++ b/src/models/NN/Type_2/BinaryNet/models/xnor_mlp.py @@ -1,44 +1,44 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['mlp'] - -class MLP(nn.Module): - def __init__(self, input_size, num_hidden_nodes, num_layers, out_classes ): - super(MLP, self).__init__() - self.num_layers = num_layers - self.classifier = nn.Sequential() - for l in range(num_layers): - if l==0: - self.classifier.add_module('layer'+str(l)+'_flatten', nn.Flatten()) - self.classifier.add_module('layer'+str(l), nn.Linear(input_size, num_hidden_nodes[l])) - self.classifier.add_module('layer'+str(l)+'_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) - self.classifier.add_module('layer'+str(l)+'_activate', nn.ReLU(inplace=True)) - elif l+1 == num_layers: - self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], out_classes)) - else: - self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], num_hidden_nodes[l])) - self.classifier.add_module('layer' + str(l) + '_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) - self.classifier.add_module('layer' + str(l) + '_activate', nn.ReLU(inplace=True)) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.classifier(x) - return x - -def mlp(input_size, num_hidden_nodes, num_layers, out_classes): - return MLP(input_size, num_hidden_nodes, num_layers, out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['mlp'] + +class MLP(nn.Module): + def __init__(self, input_size, num_hidden_nodes, num_layers, out_classes ): + super(MLP, self).__init__() + self.num_layers = num_layers + self.classifier = nn.Sequential() + for l in range(num_layers): + if l==0: + self.classifier.add_module('layer'+str(l)+'_flatten', nn.Flatten()) + self.classifier.add_module('layer'+str(l), nn.Linear(input_size, num_hidden_nodes[l])) + self.classifier.add_module('layer'+str(l)+'_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) + self.classifier.add_module('layer'+str(l)+'_activate', nn.ReLU(inplace=True)) + elif l+1 == num_layers: + self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], out_classes)) + else: + self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], num_hidden_nodes[l])) + self.classifier.add_module('layer' + str(l) + '_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) + self.classifier.add_module('layer' + str(l) + '_activate', nn.ReLU(inplace=True)) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.classifier(x) + return x + +def mlp(input_size, num_hidden_nodes, num_layers, out_classes): + return MLP(input_size, num_hidden_nodes, num_layers, out_classes) diff --git a/src/models/NN/Type_2/BinaryNet/models/xnor_nin.py b/src/models/NN/Type_2/BinaryNet/models/xnor_nin.py index 146cab5..952ca07 100755 --- a/src/models/NN/Type_2/BinaryNet/models/xnor_nin.py +++ b/src/models/NN/Type_2/BinaryNet/models/xnor_nin.py @@ -1,55 +1,55 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['nin'] - -class NIN(nn.Module): - def __init__(self, out_class=10): - super(NIN, self).__init__() - - - self.features = nn.Sequential( - nn.Conv2d(3, 192, kernel_size = 5, stride = 1, padding = 2), - nn.BatchNorm2d(192, eps=1e-4, momentum = 0.1, affine = False), - nn.ReLU(inplace=True), - - BNConvReLU(192, 160, kernel_size=1, stride=1, padding=0), - BNConvReLU(160, 96, kernel_size=1, stride=1, padding=0), - nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1), - - BNConvReLU(96, 192, kernel_size=5, stride=1, padding=2, dropout_ratio=0.5), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - nn.AvgPool2d(kernel_size = 3, stride = 2, padding = 1), - - BNConvReLU(192, 192, kernel_size=3, stride=1, padding=1, dropout_ratio=0.5), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - - nn.BatchNorm2d(192, eps = 1e-4, momentum = 0.1, affine = False), - nn.Conv2d(192, out_class, kernel_size = 1, stride = 1, padding = 0), - nn.ReLU(inplace=True), - nn.AdaptiveAvgPool2d(1), - nn.Flatten() - ) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.features(x) - return x - -def nin(out_classes=10): - return NIN(out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['nin'] + +class NIN(nn.Module): + def __init__(self, out_class=10): + super(NIN, self).__init__() + + + self.features = nn.Sequential( + nn.Conv2d(3, 192, kernel_size = 5, stride = 1, padding = 2), + nn.BatchNorm2d(192, eps=1e-4, momentum = 0.1, affine = False), + nn.ReLU(inplace=True), + + BNConvReLU(192, 160, kernel_size=1, stride=1, padding=0), + BNConvReLU(160, 96, kernel_size=1, stride=1, padding=0), + nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1), + + BNConvReLU(96, 192, kernel_size=5, stride=1, padding=2, dropout_ratio=0.5), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + nn.AvgPool2d(kernel_size = 3, stride = 2, padding = 1), + + BNConvReLU(192, 192, kernel_size=3, stride=1, padding=1, dropout_ratio=0.5), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + + nn.BatchNorm2d(192, eps = 1e-4, momentum = 0.1, affine = False), + nn.Conv2d(192, out_class, kernel_size = 1, stride = 1, padding = 0), + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d(1), + nn.Flatten() + ) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.features(x) + return x + +def nin(out_classes=10): + return NIN(out_classes) diff --git a/src/models/NN/Type_2/BinaryNet/requirements.txt b/src/models/NN/Type_2/BinaryNet/requirements.txt index dc3af0c..40ebca7 100755 --- a/src/models/NN/Type_2/BinaryNet/requirements.txt +++ b/src/models/NN/Type_2/BinaryNet/requirements.txt @@ -1,5 +1,5 @@ -torch -torchvision -tqdm -pyyaml - +torch +torchvision +tqdm +pyyaml + diff --git a/src/models/NN/Type_2/BinaryNet/yml/bnn_caffenet_cifar10.yml b/src/models/NN/Type_2/BinaryNet/yml/bnn_caffenet_cifar10.yml index 99fab9d..2c352c2 100755 --- a/src/models/NN/Type_2/BinaryNet/yml/bnn_caffenet_cifar10.yml +++ b/src/models/NN/Type_2/BinaryNet/yml/bnn_caffenet_cifar10.yml @@ -1,17 +1,17 @@ -no_cuda: False -checkpoint: "results/bnn_caffenet_cifar10" -filename: null -pretrained: null -bin_type: 'bnn' - -model : "bnn_caffenet" -save_path: "results/bnn_caffenet_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'sgd' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/bnn_caffenet_cifar10" +filename: null +pretrained: null +bin_type: 'bnn' + +model : "bnn_caffenet" +save_path: "results/bnn_caffenet_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'sgd' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_2/BinaryNet/yml/dorefa_resnet_cifar10.yml b/src/models/NN/Type_2/BinaryNet/yml/dorefa_resnet_cifar10.yml index dffecfb..29f5c73 100755 --- a/src/models/NN/Type_2/BinaryNet/yml/dorefa_resnet_cifar10.yml +++ b/src/models/NN/Type_2/BinaryNet/yml/dorefa_resnet_cifar10.yml @@ -1,18 +1,18 @@ -no_cuda: False -checkpoint: "results/dorefa_resnet_cifar10" -filename: null -pretrained: null - -bin_type: 'dorefa' - -model : "dorefa_resnet18" -save_path: "results/dorefa_resnet_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'sgd' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/dorefa_resnet_cifar10" +filename: null +pretrained: null + +bin_type: 'dorefa' + +model : "dorefa_resnet18" +save_path: "results/dorefa_resnet_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'sgd' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_2/BinaryNet/yml/lenet_mnist.yml b/src/models/NN/Type_2/BinaryNet/yml/lenet_mnist.yml index 9226466..5eb5c0b 100755 --- a/src/models/NN/Type_2/BinaryNet/yml/lenet_mnist.yml +++ b/src/models/NN/Type_2/BinaryNet/yml/lenet_mnist.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/lenet_mnist" -filename: null -pretrained: null -bin_type: "xnor" -model : "lenet5" -save_path: "results/lenet_mnist" -dataset : "mnist" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [100, 200] -epochs: 300 - +no_cuda: False +checkpoint: "results/lenet_mnist" +filename: null +pretrained: null +bin_type: "xnor" +model : "lenet5" +save_path: "results/lenet_mnist" +dataset : "mnist" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [100, 200] +epochs: 300 + diff --git a/src/models/NN/Type_2/BinaryNet/yml/mlp_mnist.yml b/src/models/NN/Type_2/BinaryNet/yml/mlp_mnist.yml index 98511c9..94afe32 100755 --- a/src/models/NN/Type_2/BinaryNet/yml/mlp_mnist.yml +++ b/src/models/NN/Type_2/BinaryNet/yml/mlp_mnist.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/mlp_mnist" -filename: null -pretrained: null -bin_type: "xnor" -model : "mlp" -save_path: "results/mlp_mnist" -dataset : "mnist" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [100, 200] -epochs: 300 - +no_cuda: False +checkpoint: "results/mlp_mnist" +filename: null +pretrained: null +bin_type: "xnor" +model : "mlp" +save_path: "results/mlp_mnist" +dataset : "mnist" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [100, 200] +epochs: 300 + diff --git a/src/models/NN/Type_2/BinaryNet/yml/nin_cifar10.yml b/src/models/NN/Type_2/BinaryNet/yml/nin_cifar10.yml index dae28c6..c45127c 100755 --- a/src/models/NN/Type_2/BinaryNet/yml/nin_cifar10.yml +++ b/src/models/NN/Type_2/BinaryNet/yml/nin_cifar10.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/nin_cifar10" -filename: null -pretrained: null -bin_type: 'xnor' -model : "nin" -save_path: "results/nin_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/nin_cifar10" +filename: null +pretrained: null +bin_type: 'xnor' +model : "nin" +save_path: "results/nin_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_2/dedicated_p4.py b/src/models/NN/Type_2/dedicated_p4.py index 9d84adc..a4540ca 100755 --- a/src/models/NN/Type_2/dedicated_p4.py +++ b/src/models/NN/Type_2/dedicated_p4.py @@ -1,312 +1,312 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in Ternary_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Ternary_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + - "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in Ternary_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Ternary_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/NN/Type_2/readme.md b/src/models/NN/Type_2/readme.md index 955597d..0a8514a 100644 --- a/src/models/NN/Type_2/readme.md +++ b/src/models/NN/Type_2/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for NN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for NN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/NN/Type_2/table_generator.py b/src/models/NN/Type_2/table_generator.py index 3ae304d..498f0ab 100755 --- a/src/models/NN/Type_2/table_generator.py +++ b/src/models/NN/Type_2/table_generator.py @@ -1,360 +1,360 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import math - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - - -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -from src.models.NN.Type_2.BinaryNet.models.xnor_layers import * -from src.models.NN.Type_2.BinaryNet.models.xnor_mlp import * -from src.models.NN.Type_2.BinaryNet.models import * -from src.models.NN.Type_2.BinaryNet.classifiers.xnor_classifier import * -from sklearn.metrics import * -import torch.nn as nn -from torch.autograd import Variable as V -from torch.utils.data import DataLoader, Dataset, TensorDataset -import torch -from pathlib import Path -from datetime import datetime -import os - -import numpy as np -import pandas as pd -from scapy.compat import raw -from scapy.layers.inet import IP, UDP -from scapy.layers.l2 import Ether -from scapy.packet import Padding -from scipy import sparse - -from joblib import Parallel, delayed - -import tensorflow as tf -from tensorflow import keras -from tensorflow.keras import layers -from keras.constraints import maxnorm - -from sklearn.model_selection import train_test_split - -from imblearn.over_sampling import RandomOverSampler -from imblearn.under_sampling import RandomUnderSampler - -from keras.utils.np_utils import to_categorical -import matplotlib.pyplot as plt - -import seaborn as sn - - -def inttobit(n): - return [1 if digit == '1' else -1 for digit in '{0:056b}'.format(n)] - - -def bintoint(binary): - number = 0 - for b in binary: - number = (2 * number) + int(b) - return number - - -def conv_header(proto, sport, dport, size): - proto_bin = [int(x) for x in '{0:08b}'.format(proto)] - sport_bin = [int(x) for x in '{0:016b}'.format(sport)] - dport_bin = [int(x) for x in '{0:016b}'.format(dport)] - size_bin = [int(x) for x in '{0:016b}'.format(size)] - metric = proto_bin + sport_bin + dport_bin + size_bin - metric_int = bintoint(metric) - return metric, metric_int - - -# convert weights to binary weights and to integer -def convert_weight_to_file(weight_data, filename): - weight = [] - weights = [] - for i in weight_data: - for j in i: - if j < 0: - weight.append(0) - else: - weight.append(1) - weights.append(bintoint(weight)) - weight.clear() - txtfile = open(filename, "w") - for i in weights: - txtfile.write(str(i) + "\n") - txtfile.close() - - - - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - Planter_config['model config']['num components'] = int(input('- Number of components? (default = 2) ') or '2') - Planter_config['model config']['learning rate'] = np.float( - input('- Model learning rate? (default = 0.01) ') or '0.01') - Planter_config['model config']['batch size'] = int(input('- Model batch size? (default = 10) ') or '10') - Planter_config['model config']['num epoch'] = int(input('- Number of training epoch? (default = 5) ') or '5') - Planter_config['model config']['number of bits'] = int( - input('- Number of bits for each action data? (default = 16) ') or '16') - - num_bits = Planter_config['model config']['number of bits'] - - num_components = Planter_config['model config']['num components'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - learning_rate = Planter_config['model config']['learning rate'] - batch_size = Planter_config['model config']['batch size'] - num_epoch = Planter_config['model config']['num epoch'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - width = [] - for f in range(num_features): - width += [np.ceil(math.log(feature_max[f],2))] - width_row = int(np.sum(width)) - - - total_count = np.shape(train_X.values)[0] + np.shape(test_X.values)[0] - count = 0 - - train_X_new = [] - test_X_new = [] - for i in range(np.shape(train_X.values)[0]): - flag = 0 - row = (np.zeros(int(width_row))) - for f in range(num_features): - code = ten_to_bin(train_X.values[i][f],width[f]) - for d in range(int(width[f])): - row[flag] = int(code[d]) - flag += 1 - train_X_new += [row] - # ====================== - count += 1 - percent = int(np.ceil(50 * count / total_count)) - print('\rProcessing the raw Data [' + percent * '#' + (50 - percent) * '-' + '] ' + str( int(np.round(100 * count / total_count))) + "%", end="") - # ====================== - train_X_new = np.array(train_X_new) - - for i in range(np.shape(test_X.values)[0]): - flag = 0 - row = (np.zeros(int(width_row))) - for f in range(num_features): - code = ten_to_bin(test_X.values[i][f],width[f]) - for d in range(int(width[f])): - row[flag] = int(code[d]) - flag += 1 - test_X_new += [row] - # ====================== - count += 1 - percent = int(np.ceil(50 * count / total_count)) - print('\rProcessing the raw data [' + percent * '#' + (50 - percent) * '-' + '] ' + str(int(np.round(100 * count / total_count))) + "%", end="") - # ====================== - test_X_new = np.array(test_X_new) - print('\nData set is ready') - - - ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 - tensor_x = torch.from_numpy(train_X_new.astype(np.float32)) - tensor_y = torch.LongTensor(train_y.astype(np.float32)) - test_X = torch.from_numpy(test_X_new.astype(np.float32)) - test_y = torch.LongTensor(test_y.astype(np.float32)) - my_train_dataset = TensorDataset(tensor_x, tensor_y) - my_test_dataset = TensorDataset(test_X, test_y) - train_loader = DataLoader(my_train_dataset, batch_size=batch_size, shuffle=False) - test_loader = DataLoader(my_test_dataset, batch_size=batch_size, shuffle=False) - - # divide train and test data - x_train, x_test, y_train, y_test = train_test_split(np.array(train_X_new, dtype=np.float32), train_y, test_size=0.2, random_state=1) - x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2 - model = None - - model = keras.models.Sequential() - model.add(layers.Dropout(0.2)) - model.add(layers.Dense(56, input_dim=56, activation="tanh", name="dense_1", kernel_constraint=maxnorm(2))) - model.add(layers.Dropout(0.5)) - model.add(layers.Dense(56, activation="tanh", name="dense_2", kernel_constraint=maxnorm(2))) - model.add(layers.Dropout(0.5)) - model.add(layers.Dense(56, activation="tanh", name="dense_3", kernel_constraint=maxnorm(2))) - model.add(layers.Dropout(0.5)) - model.add(layers.Dense(56, activation="tanh", name="dense_4", kernel_constraint=maxnorm(2))) - model.add(layers.Dropout(0.5)) - model.add(layers.Dense(6, activation="softmax", name="predictions")) - - # define optimizer, loss function, and metric - model.compile( - optimizer=keras.optimizers.Adam(), # Optimizer - # Loss function to minimize - loss=keras.losses.SparseCategoricalCrossentropy(), - # List of metrics to monitor - metrics=[keras.metrics.SparseCategoricalAccuracy()] - # metrics=[keras.metrics.Accuracy()], - ) - - # fit training model - history = model.fit( - x_train, - y_train, - batch_size=128, - epochs=20, - verbose=2, - # We pass some validation for - # monitoring validation loss and metrics - # at the end of each epoch - validation_data=(x_val, y_val), - ) - - # transpose weight dimension - transposed_l1 = tf.transpose(model.trainable_variables[0]) - transposed_l2 = tf.transpose(model.trainable_variables[2]) - transposed_l3 = tf.transpose(model.trainable_variables[4]) - transposed_l4 = tf.transpose(model.trainable_variables[6]) - # transposed_l5 = tf.transpose(model.trainable_variables[8]) - - convert_weight_to_file(transposed_l1, "./src/temp/w-l1.txt") - convert_weight_to_file(transposed_l2, "./src/temp/w-l2.txt") - convert_weight_to_file(transposed_l3, "./src/temp/w-l3.txt") - convert_weight_to_file(transposed_l4, "./src/temp/w-l4.txt") - - - - exit() - return test_y.tolist() - - - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - switch_prediction = Planter_config['p4 config']["default label"] - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import math + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + + +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +from src.models.NN.Type_2.BinaryNet.models.xnor_layers import * +from src.models.NN.Type_2.BinaryNet.models.xnor_mlp import * +from src.models.NN.Type_2.BinaryNet.models import * +from src.models.NN.Type_2.BinaryNet.classifiers.xnor_classifier import * +from sklearn.metrics import * +import torch.nn as nn +from torch.autograd import Variable as V +from torch.utils.data import DataLoader, Dataset, TensorDataset +import torch +from pathlib import Path +from datetime import datetime +import os + +import numpy as np +import pandas as pd +from scapy.compat import raw +from scapy.layers.inet import IP, UDP +from scapy.layers.l2 import Ether +from scapy.packet import Padding +from scipy import sparse + +from joblib import Parallel, delayed + +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers +from keras.constraints import maxnorm + +from sklearn.model_selection import train_test_split + +from imblearn.over_sampling import RandomOverSampler +from imblearn.under_sampling import RandomUnderSampler + +from keras.utils.np_utils import to_categorical +import matplotlib.pyplot as plt + +import seaborn as sn + + +def inttobit(n): + return [1 if digit == '1' else -1 for digit in '{0:056b}'.format(n)] + + +def bintoint(binary): + number = 0 + for b in binary: + number = (2 * number) + int(b) + return number + + +def conv_header(proto, sport, dport, size): + proto_bin = [int(x) for x in '{0:08b}'.format(proto)] + sport_bin = [int(x) for x in '{0:016b}'.format(sport)] + dport_bin = [int(x) for x in '{0:016b}'.format(dport)] + size_bin = [int(x) for x in '{0:016b}'.format(size)] + metric = proto_bin + sport_bin + dport_bin + size_bin + metric_int = bintoint(metric) + return metric, metric_int + + +# convert weights to binary weights and to integer +def convert_weight_to_file(weight_data, filename): + weight = [] + weights = [] + for i in weight_data: + for j in i: + if j < 0: + weight.append(0) + else: + weight.append(1) + weights.append(bintoint(weight)) + weight.clear() + txtfile = open(filename, "w") + for i in weights: + txtfile.write(str(i) + "\n") + txtfile.close() + + + + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + Planter_config['model config']['num components'] = int(input('- Number of components? (default = 2) ') or '2') + Planter_config['model config']['learning rate'] = np.float( + input('- Model learning rate? (default = 0.01) ') or '0.01') + Planter_config['model config']['batch size'] = int(input('- Model batch size? (default = 10) ') or '10') + Planter_config['model config']['num epoch'] = int(input('- Number of training epoch? (default = 5) ') or '5') + Planter_config['model config']['number of bits'] = int( + input('- Number of bits for each action data? (default = 16) ') or '16') + + num_bits = Planter_config['model config']['number of bits'] + + num_components = Planter_config['model config']['num components'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + learning_rate = Planter_config['model config']['learning rate'] + batch_size = Planter_config['model config']['batch size'] + num_epoch = Planter_config['model config']['num epoch'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + width = [] + for f in range(num_features): + width += [np.ceil(math.log(feature_max[f],2))] + width_row = int(np.sum(width)) + + + total_count = np.shape(train_X.values)[0] + np.shape(test_X.values)[0] + count = 0 + + train_X_new = [] + test_X_new = [] + for i in range(np.shape(train_X.values)[0]): + flag = 0 + row = (np.zeros(int(width_row))) + for f in range(num_features): + code = ten_to_bin(train_X.values[i][f],width[f]) + for d in range(int(width[f])): + row[flag] = int(code[d]) + flag += 1 + train_X_new += [row] + # ====================== + count += 1 + percent = int(np.ceil(50 * count / total_count)) + print('\rProcessing the raw Data [' + percent * '#' + (50 - percent) * '-' + '] ' + str( int(np.round(100 * count / total_count))) + "%", end="") + # ====================== + train_X_new = np.array(train_X_new) + + for i in range(np.shape(test_X.values)[0]): + flag = 0 + row = (np.zeros(int(width_row))) + for f in range(num_features): + code = ten_to_bin(test_X.values[i][f],width[f]) + for d in range(int(width[f])): + row[flag] = int(code[d]) + flag += 1 + test_X_new += [row] + # ====================== + count += 1 + percent = int(np.ceil(50 * count / total_count)) + print('\rProcessing the raw data [' + percent * '#' + (50 - percent) * '-' + '] ' + str(int(np.round(100 * count / total_count))) + "%", end="") + # ====================== + test_X_new = np.array(test_X_new) + print('\nData set is ready') + + + ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 + tensor_x = torch.from_numpy(train_X_new.astype(np.float32)) + tensor_y = torch.LongTensor(train_y.astype(np.float32)) + test_X = torch.from_numpy(test_X_new.astype(np.float32)) + test_y = torch.LongTensor(test_y.astype(np.float32)) + my_train_dataset = TensorDataset(tensor_x, tensor_y) + my_test_dataset = TensorDataset(test_X, test_y) + train_loader = DataLoader(my_train_dataset, batch_size=batch_size, shuffle=False) + test_loader = DataLoader(my_test_dataset, batch_size=batch_size, shuffle=False) + + # divide train and test data + x_train, x_test, y_train, y_test = train_test_split(np.array(train_X_new, dtype=np.float32), train_y, test_size=0.2, random_state=1) + x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2 + model = None + + model = keras.models.Sequential() + model.add(layers.Dropout(0.2)) + model.add(layers.Dense(56, input_dim=56, activation="tanh", name="dense_1", kernel_constraint=maxnorm(2))) + model.add(layers.Dropout(0.5)) + model.add(layers.Dense(56, activation="tanh", name="dense_2", kernel_constraint=maxnorm(2))) + model.add(layers.Dropout(0.5)) + model.add(layers.Dense(56, activation="tanh", name="dense_3", kernel_constraint=maxnorm(2))) + model.add(layers.Dropout(0.5)) + model.add(layers.Dense(56, activation="tanh", name="dense_4", kernel_constraint=maxnorm(2))) + model.add(layers.Dropout(0.5)) + model.add(layers.Dense(6, activation="softmax", name="predictions")) + + # define optimizer, loss function, and metric + model.compile( + optimizer=keras.optimizers.Adam(), # Optimizer + # Loss function to minimize + loss=keras.losses.SparseCategoricalCrossentropy(), + # List of metrics to monitor + metrics=[keras.metrics.SparseCategoricalAccuracy()] + # metrics=[keras.metrics.Accuracy()], + ) + + # fit training model + history = model.fit( + x_train, + y_train, + batch_size=128, + epochs=20, + verbose=2, + # We pass some validation for + # monitoring validation loss and metrics + # at the end of each epoch + validation_data=(x_val, y_val), + ) + + # transpose weight dimension + transposed_l1 = tf.transpose(model.trainable_variables[0]) + transposed_l2 = tf.transpose(model.trainable_variables[2]) + transposed_l3 = tf.transpose(model.trainable_variables[4]) + transposed_l4 = tf.transpose(model.trainable_variables[6]) + # transposed_l5 = tf.transpose(model.trainable_variables[8]) + + convert_weight_to_file(transposed_l1, "./src/temp/w-l1.txt") + convert_weight_to_file(transposed_l2, "./src/temp/w-l2.txt") + convert_weight_to_file(transposed_l3, "./src/temp/w-l3.txt") + convert_weight_to_file(transposed_l4, "./src/temp/w-l4.txt") + + + + exit() + return test_y.tolist() + + + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + switch_prediction = Planter_config['p4 config']["default label"] + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) print('\n', result) \ No newline at end of file diff --git a/src/models/NN/Type_DM/BinaryNet/README.md b/src/models/NN/Type_DM/BinaryNet/README.md index 2697020..d1c6639 100755 --- a/src/models/NN/Type_DM/BinaryNet/README.md +++ b/src/models/NN/Type_DM/BinaryNet/README.md @@ -1,52 +1,52 @@ -# Binary Neural Networks on PyTorch - -![Binarization](https://github.com/lucamocerino/Binary-Neural-Networks-PyTorch-1.0/blob/master/bin.png) - - -This repository implements three popular papers that introduced the concept of Binary Neural Networks: -- **XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks**: https://arxiv.org/abs/1603.05279. -- **Binarized Neural Networks** :https://papers.nips.cc/paper/6573-binarized-neural-networks -- **DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients** :https://arxiv.org/abs/1606.06160 - - - -The project is organized as follows: - - - **models** folder contains CNN models (simple mlp, Network-in-Network, LeNet5, etc.) - - **classifiers/{type}_classifier.py** contains the test and train procedures; where type = {bnn, xnor, dorefa} - - **models/{type}_layers.py** contains the binarylayers implementation (binary activation, binary conv and fully-connected layers, gradient update); where type = {bnn, xnor, dorefa} - - **yml** folder contains configuration files with hyperparameters - - **main.py** represents the entry file - -### Installation - -All packages are in *requirement.txt* -Install the dependencies: - -```sh -pip install -r requirements.txt -``` -### Basic usage -```sh -$ python main.py app:{yml_file} -``` -### Example -Network-in-Network on CIFAR10 dataset. All hyper parameters are in .yml file. -```sh -$ python main.py app:yml/nin_cifar10.yml -``` -## Related Applications -If you find this code useful in your research, please consider citing one of the works in this section. - - - Fast and Accurate Inference on Microcontrollers With Boosted Cooperative Convolutional Neural Networks (BC-Net) https://ieeexplore.ieee.org/abstract/document/9275360 - - CoopNet: Cooperative Convolutional Neural Network for Low-Power MCUs https://ieeexplore.ieee.org/abstract/document/8964993 - - TentacleNet: A Pseudo-Ensemble Template for Accurate Binary Convolutional Neural Networks https://ieeexplore.ieee.org/abstract/document/9073982/ - -License ----- - -MIT - - - - +# Binary Neural Networks on PyTorch + +![Binarization](https://github.com/lucamocerino/Binary-Neural-Networks-PyTorch-1.0/blob/master/bin.png) + + +This repository implements three popular papers that introduced the concept of Binary Neural Networks: +- **XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks**: https://arxiv.org/abs/1603.05279. +- **Binarized Neural Networks** :https://papers.nips.cc/paper/6573-binarized-neural-networks +- **DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients** :https://arxiv.org/abs/1606.06160 + + + +The project is organized as follows: + + - **models** folder contains CNN models (simple mlp, Network-in-Network, LeNet5, etc.) + - **classifiers/{type}_classifier.py** contains the test and train procedures; where type = {bnn, xnor, dorefa} + - **models/{type}_layers.py** contains the binarylayers implementation (binary activation, binary conv and fully-connected layers, gradient update); where type = {bnn, xnor, dorefa} + - **yml** folder contains configuration files with hyperparameters + - **main.py** represents the entry file + +### Installation + +All packages are in *requirement.txt* +Install the dependencies: + +```sh +pip install -r requirements.txt +``` +### Basic usage +```sh +$ python main.py app:{yml_file} +``` +### Example +Network-in-Network on CIFAR10 dataset. All hyper parameters are in .yml file. +```sh +$ python main.py app:yml/nin_cifar10.yml +``` +## Related Applications +If you find this code useful in your research, please consider citing one of the works in this section. + + - Fast and Accurate Inference on Microcontrollers With Boosted Cooperative Convolutional Neural Networks (BC-Net) https://ieeexplore.ieee.org/abstract/document/9275360 + - CoopNet: Cooperative Convolutional Neural Network for Low-Power MCUs https://ieeexplore.ieee.org/abstract/document/8964993 + - TentacleNet: A Pseudo-Ensemble Template for Accurate Binary Convolutional Neural Networks https://ieeexplore.ieee.org/abstract/document/9073982/ + +License +---- + +MIT + + + + diff --git a/src/models/NN/Type_DM/BinaryNet/classifiers/bnn_classifier.py b/src/models/NN/Type_DM/BinaryNet/classifiers/bnn_classifier.py index 22f271f..6087dd5 100755 --- a/src/models/NN/Type_DM/BinaryNet/classifiers/bnn_classifier.py +++ b/src/models/NN/Type_DM/BinaryNet/classifiers/bnn_classifier.py @@ -1,112 +1,112 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -import shutil - - -class BnnClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def top1_accuracy(self): - return top1_accuracy(self.model, self.test_loader, self.device) - - - def train_step(self, criterion, optimizer): - losses = [] - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - optimizer.zero_grad() - loss.backward() - for p in self.model.modules(): - if hasattr(p, 'weight_org'): - p.weight.data.copy_(p.weight_org) - optimizer.step() - for p in self.model.modules(): - if hasattr(p, 'weight_org'): - p.weight_org.data.copy_(p.weight.data.clamp_(-1,1)) - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +import shutil + + +class BnnClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def top1_accuracy(self): + return top1_accuracy(self.model, self.test_loader, self.device) + + + def train_step(self, criterion, optimizer): + losses = [] + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + optimizer.zero_grad() + loss.backward() + for p in self.model.modules(): + if hasattr(p, 'weight_org'): + p.weight.data.copy_(p.weight_org) + optimizer.step() + for p in self.model.modules(): + if hasattr(p, 'weight_org'): + p.weight_org.data.copy_(p.weight.data.clamp_(-1,1)) + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_DM/BinaryNet/classifiers/dorefa_classifier.py b/src/models/NN/Type_DM/BinaryNet/classifiers/dorefa_classifier.py index 2bb54b9..2f9a667 100755 --- a/src/models/NN/Type_DM/BinaryNet/classifiers/dorefa_classifier.py +++ b/src/models/NN/Type_DM/BinaryNet/classifiers/dorefa_classifier.py @@ -1,109 +1,109 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -import shutil - -class DorefaClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def train_step(self, criterion, optimizer): - losses = [] - self.model.train() - - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - - - data, target = data.to(self.device), target.to(self.device) - optimizer.zero_grad() - - - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - loss.backward() - - optimizer.step() - - - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +import shutil + +class DorefaClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def train_step(self, criterion, optimizer): + losses = [] + self.model.train() + + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + + + data, target = data.to(self.device), target.to(self.device) + optimizer.zero_grad() + + + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + loss.backward() + + optimizer.step() + + + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_DM/BinaryNet/classifiers/xnor_classifier.py b/src/models/NN/Type_DM/BinaryNet/classifiers/xnor_classifier.py index 9e133f2..de212d1 100755 --- a/src/models/NN/Type_DM/BinaryNet/classifiers/xnor_classifier.py +++ b/src/models/NN/Type_DM/BinaryNet/classifiers/xnor_classifier.py @@ -1,128 +1,128 @@ -import os -import numpy as np -from torch import save, no_grad -from tqdm import tqdm -from src.models.NN.Type_1.BinaryNet.models.xnor_layers import XNORConv2d -import shutil -from sklearn.metrics import * - -class XnorClassifier(): - def __init__(self, model, train_loader=None, test_loader=None, device=None): - super().__init__() - self.model = model - self.train_loader = train_loader - self.test_loader = test_loader - self.device = device - - @staticmethod - def save_checkpoint(state, is_best, checkpoint): - head, tail = os.path.split(checkpoint) - if not os.path.exists(head): - os.makedirs(head) - - filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) - save(state, filename) - if is_best: - shutil.copyfile(filename, os.path.join(head, - '{0}_best.pth.tar'.format(tail))) - - return - - def test(self, criterion): - self.model.eval() - top1 = 0 - test_loss = 0. - first = True - with no_grad(): - for data, target in tqdm(self.test_loader): - data, target = data.to(self.device), target.to(self.device) - output = self.model(data) - test_loss += criterion(output, target).item() - pred = output.argmax(dim=1, keepdim=True) - top1 += pred.eq(target.view_as(pred)).sum().item() - if first: - - nn_pred = pred[:,0].numpy() - label = target.numpy() - # print(nn_pred, output) - first = False - else: - # print(nn_pred, output) - nn_pred = np.hstack((nn_pred, pred[:,0].numpy())) - label = np.hstack((label, target.numpy())) - - result = classification_report(nn_pred, label, digits=4) - print('\n', result) - - top1_acc = 100. * top1 / len(self.test_loader.sampler) - - return top1_acc - - - def train_step(self, criterion, optimizer): - losses = [] - self.model.train() - - for data, target in tqdm(self.train_loader, - total=len(self.train_loader)): - - - data, target = data.to(self.device), target.to(self.device) - optimizer.zero_grad() - - - output = self.model(data) - loss = criterion(output, target) - losses.append(loss.item()) - loss.backward() - - for m in self.model.modules(): - if isinstance(m, XNORConv2d): - m.update_gradient() - - optimizer.step() - - - return losses - - def train(self, criterion, optimizer, epochs, scheduler, - checkpoint=None): - - if checkpoint is None: - raise ValueError('Specify a valid checkpoint') - - - best_accuracy = 0. - - losses = [] - accuracies = [] - - - - for epoch in range(1, epochs+1): - self.model.train() - epoch_losses = self.train_step(criterion, optimizer) - losses += epoch_losses - epoch_losses = np.array(epoch_losses) - lr = optimizer.param_groups[0]['lr'] - test_accuracy = self.test(criterion) - accuracies.append(test_accuracy) - if scheduler: - scheduler.step() - is_best = test_accuracy > best_accuracy - if is_best: - best_accuracy = test_accuracy - - print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' - .format(epoch, epoch_losses.mean(), test_accuracy, lr)) - print('Best accuracy: {:.3f} '.format(best_accuracy)) - - self.save_checkpoint({ - 'epoch': epoch+1, - 'state_dict': self.model.state_dict(), - 'best_accuracy': best_accuracy, - 'optimizer': optimizer.state_dict(), - 'criterion': criterion, - }, is_best, checkpoint) - - return +import os +import numpy as np +from torch import save, no_grad +from tqdm import tqdm +from src.models.NN.Type_1.BinaryNet.models.xnor_layers import XNORConv2d +import shutil +from sklearn.metrics import * + +class XnorClassifier(): + def __init__(self, model, train_loader=None, test_loader=None, device=None): + super().__init__() + self.model = model + self.train_loader = train_loader + self.test_loader = test_loader + self.device = device + + @staticmethod + def save_checkpoint(state, is_best, checkpoint): + head, tail = os.path.split(checkpoint) + if not os.path.exists(head): + os.makedirs(head) + + filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail)) + save(state, filename) + if is_best: + shutil.copyfile(filename, os.path.join(head, + '{0}_best.pth.tar'.format(tail))) + + return + + def test(self, criterion): + self.model.eval() + top1 = 0 + test_loss = 0. + first = True + with no_grad(): + for data, target in tqdm(self.test_loader): + data, target = data.to(self.device), target.to(self.device) + output = self.model(data) + test_loss += criterion(output, target).item() + pred = output.argmax(dim=1, keepdim=True) + top1 += pred.eq(target.view_as(pred)).sum().item() + if first: + + nn_pred = pred[:,0].numpy() + label = target.numpy() + # print(nn_pred, output) + first = False + else: + # print(nn_pred, output) + nn_pred = np.hstack((nn_pred, pred[:,0].numpy())) + label = np.hstack((label, target.numpy())) + + result = classification_report(nn_pred, label, digits=4) + print('\n', result) + + top1_acc = 100. * top1 / len(self.test_loader.sampler) + + return top1_acc + + + def train_step(self, criterion, optimizer): + losses = [] + self.model.train() + + for data, target in tqdm(self.train_loader, + total=len(self.train_loader)): + + + data, target = data.to(self.device), target.to(self.device) + optimizer.zero_grad() + + + output = self.model(data) + loss = criterion(output, target) + losses.append(loss.item()) + loss.backward() + + for m in self.model.modules(): + if isinstance(m, XNORConv2d): + m.update_gradient() + + optimizer.step() + + + return losses + + def train(self, criterion, optimizer, epochs, scheduler, + checkpoint=None): + + if checkpoint is None: + raise ValueError('Specify a valid checkpoint') + + + best_accuracy = 0. + + losses = [] + accuracies = [] + + + + for epoch in range(1, epochs+1): + self.model.train() + epoch_losses = self.train_step(criterion, optimizer) + losses += epoch_losses + epoch_losses = np.array(epoch_losses) + lr = optimizer.param_groups[0]['lr'] + test_accuracy = self.test(criterion) + accuracies.append(test_accuracy) + if scheduler: + scheduler.step() + is_best = test_accuracy > best_accuracy + if is_best: + best_accuracy = test_accuracy + + print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}' + .format(epoch, epoch_losses.mean(), test_accuracy, lr)) + print('Best accuracy: {:.3f} '.format(best_accuracy)) + + self.save_checkpoint({ + 'epoch': epoch+1, + 'state_dict': self.model.state_dict(), + 'best_accuracy': best_accuracy, + 'optimizer': optimizer.state_dict(), + 'criterion': criterion, + }, is_best, checkpoint) + + return diff --git a/src/models/NN/Type_DM/BinaryNet/config.py b/src/models/NN/Type_DM/BinaryNet/config.py index 51c5c8e..dd377fe 100755 --- a/src/models/NN/Type_DM/BinaryNet/config.py +++ b/src/models/NN/Type_DM/BinaryNet/config.py @@ -1,169 +1,169 @@ -"""config utilities for yml file.""" -import os -import sys -import yaml - -# singletone -FLAGS = None - - -class LoaderMeta(type): - """Constructor for supporting `!include`. - """ - def __new__(mcs, __name__, __bases__, __dict__): - """Add include constructer to class.""" - # register the include constructor on the class - cls = super().__new__(mcs, __name__, __bases__, __dict__) - cls.add_constructor('!include', cls.construct_include) - return cls - - -class Loader(yaml.Loader, metaclass=LoaderMeta): - """YAML Loader with `!include` constructor. - """ - def __init__(self, stream): - try: - self._root = os.path.split(stream.name)[0] - except AttributeError: - self._root = os.path.curdir - super().__init__(stream) - - def construct_include(self, node): - """Include file referenced at node.""" - filename = os.path.abspath( - os.path.join(self._root, self.construct_scalar(node))) - extension = os.path.splitext(filename)[1].lstrip('.') - with open(filename, 'r') as f: - if extension in ('yaml', 'yml'): - return yaml.load(f, Loader) - else: - return ''.join(f.readlines()) - - -class AttrDict(dict): - """Dict as attribute trick. - - """ - def __init__(self, *args, **kwargs): - super(AttrDict, self).__init__(*args, **kwargs) - self.__dict__ = self - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, dict): - self.__dict__[key] = AttrDict(value) - elif isinstance(value, list): - if isinstance(value[0], dict): - self.__dict__[key] = [AttrDict(item) for item in value] - else: - self.__dict__[key] = value - - def yaml(self): - """Convert object to yaml dict and return. - - """ - yaml_dict = {} - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, AttrDict): - yaml_dict[key] = value.yaml() - elif isinstance(value, list): - if isinstance(value[0], AttrDict): - new_l = [] - for item in value: - new_l.append(item.yaml()) - yaml_dict[key] = new_l - else: - yaml_dict[key] = value - else: - yaml_dict[key] = value - return yaml_dict - - def __repr__(self): - """Print all variables. - - """ - ret_str = [] - for key in self.__dict__: - value = self.__dict__[key] - if isinstance(value, AttrDict): - ret_str.append('{}:'.format(key)) - child_ret_str = value.__repr__().split('\n') - for item in child_ret_str: - ret_str.append(' ' + item) - elif isinstance(value, list): - if isinstance(value[0], AttrDict): - ret_str.append('{}:'.format(key)) - for item in value: - # treat as AttrDict above - child_ret_str = item.__repr__().split('\n') - for item in child_ret_str: - ret_str.append(' ' + item) - else: - ret_str.append('{}: {}'.format(key, value)) - else: - ret_str.append('{}: {}'.format(key, value)) - return '\n'.join(ret_str) - - -class Config(AttrDict): - """Config with yaml file. - - This class is used to config model hyper-parameters, global constants, and - other settings with yaml file. All settings in yaml file will be - automatically logged into file. - - Args: - filename(str): File name. - - Examples: - - yaml file ``model.yml``:: - - NAME: 'neuralgym' - ALPHA: 1.0 - DATASET: '/mnt/data/imagenet' - - Usage in .py: - - >>> from neuralgym import Config - >>> config = Config('model.yml') - >>> print(config.NAME) - neuralgym - >>> print(config.ALPHA) - 1.0 - >>> print(config.DATASET) - /mnt/data/imagenet - - """ - - def __init__(self, filename=None, verbose=False): - assert os.path.exists(filename), 'File {} not exist.'.format(filename) - try: - with open(filename, 'r') as f: - cfg_dict = yaml.load(f, Loader) - except EnvironmentError: - print('Please check the file with name of "%s"', filename) - super(Config, self).__init__(cfg_dict) - if verbose: - print(' pi.cfg '.center(80, '-')) - print(self.__repr__()) - print(''.center(80, '-')) - - -def app(): - """Load app via stdin from subprocess""" - global FLAGS - if FLAGS is None: - job_yaml_file = None - for arg in sys.argv: - if arg.startswith('app:'): - job_yaml_file = arg[4:] - if job_yaml_file is None: - job_yaml_file = sys.stdin.readline() - FLAGS = Config(job_yaml_file) - return FLAGS - else: - return FLAGS - - -app() +"""config utilities for yml file.""" +import os +import sys +import yaml + +# singletone +FLAGS = None + + +class LoaderMeta(type): + """Constructor for supporting `!include`. + """ + def __new__(mcs, __name__, __bases__, __dict__): + """Add include constructer to class.""" + # register the include constructor on the class + cls = super().__new__(mcs, __name__, __bases__, __dict__) + cls.add_constructor('!include', cls.construct_include) + return cls + + +class Loader(yaml.Loader, metaclass=LoaderMeta): + """YAML Loader with `!include` constructor. + """ + def __init__(self, stream): + try: + self._root = os.path.split(stream.name)[0] + except AttributeError: + self._root = os.path.curdir + super().__init__(stream) + + def construct_include(self, node): + """Include file referenced at node.""" + filename = os.path.abspath( + os.path.join(self._root, self.construct_scalar(node))) + extension = os.path.splitext(filename)[1].lstrip('.') + with open(filename, 'r') as f: + if extension in ('yaml', 'yml'): + return yaml.load(f, Loader) + else: + return ''.join(f.readlines()) + + +class AttrDict(dict): + """Dict as attribute trick. + + """ + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, dict): + self.__dict__[key] = AttrDict(value) + elif isinstance(value, list): + if isinstance(value[0], dict): + self.__dict__[key] = [AttrDict(item) for item in value] + else: + self.__dict__[key] = value + + def yaml(self): + """Convert object to yaml dict and return. + + """ + yaml_dict = {} + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, AttrDict): + yaml_dict[key] = value.yaml() + elif isinstance(value, list): + if isinstance(value[0], AttrDict): + new_l = [] + for item in value: + new_l.append(item.yaml()) + yaml_dict[key] = new_l + else: + yaml_dict[key] = value + else: + yaml_dict[key] = value + return yaml_dict + + def __repr__(self): + """Print all variables. + + """ + ret_str = [] + for key in self.__dict__: + value = self.__dict__[key] + if isinstance(value, AttrDict): + ret_str.append('{}:'.format(key)) + child_ret_str = value.__repr__().split('\n') + for item in child_ret_str: + ret_str.append(' ' + item) + elif isinstance(value, list): + if isinstance(value[0], AttrDict): + ret_str.append('{}:'.format(key)) + for item in value: + # treat as AttrDict above + child_ret_str = item.__repr__().split('\n') + for item in child_ret_str: + ret_str.append(' ' + item) + else: + ret_str.append('{}: {}'.format(key, value)) + else: + ret_str.append('{}: {}'.format(key, value)) + return '\n'.join(ret_str) + + +class Config(AttrDict): + """Config with yaml file. + + This class is used to config model hyper-parameters, global constants, and + other settings with yaml file. All settings in yaml file will be + automatically logged into file. + + Args: + filename(str): File name. + + Examples: + + yaml file ``model.yml``:: + + NAME: 'neuralgym' + ALPHA: 1.0 + DATASET: '/mnt/data/imagenet' + + Usage in .py: + + >>> from neuralgym import Config + >>> config = Config('model.yml') + >>> print(config.NAME) + neuralgym + >>> print(config.ALPHA) + 1.0 + >>> print(config.DATASET) + /mnt/data/imagenet + + """ + + def __init__(self, filename=None, verbose=False): + assert os.path.exists(filename), 'File {} not exist.'.format(filename) + try: + with open(filename, 'r') as f: + cfg_dict = yaml.load(f, Loader) + except EnvironmentError: + print('Please check the file with name of "%s"', filename) + super(Config, self).__init__(cfg_dict) + if verbose: + print(' pi.cfg '.center(80, '-')) + print(self.__repr__()) + print(''.center(80, '-')) + + +def app(): + """Load app via stdin from subprocess""" + global FLAGS + if FLAGS is None: + job_yaml_file = None + for arg in sys.argv: + if arg.startswith('app:'): + job_yaml_file = arg[4:] + if job_yaml_file is None: + job_yaml_file = sys.stdin.readline() + FLAGS = Config(job_yaml_file) + return FLAGS + else: + return FLAGS + + +app() diff --git a/src/models/NN/Type_DM/BinaryNet/dataloader/__init__.py b/src/models/NN/Type_DM/BinaryNet/dataloader/__init__.py index 788a17b..d210291 100755 --- a/src/models/NN/Type_DM/BinaryNet/dataloader/__init__.py +++ b/src/models/NN/Type_DM/BinaryNet/dataloader/__init__.py @@ -1,2 +1,2 @@ -from .cifar10 import * -from .mnist import * +from .cifar10 import * +from .mnist import * diff --git a/src/models/NN/Type_DM/BinaryNet/dataloader/cifar10.py b/src/models/NN/Type_DM/BinaryNet/dataloader/cifar10.py index ec1333a..9c18a8e 100755 --- a/src/models/NN/Type_DM/BinaryNet/dataloader/cifar10.py +++ b/src/models/NN/Type_DM/BinaryNet/dataloader/cifar10.py @@ -1,39 +1,39 @@ -import os -import torch -from torchvision.datasets import CIFAR10 -import torchvision.transforms as tvt - - -def load_train_data(batch_size=64, sampler=None): - transform = tvt.Compose([ - tvt.RandomCrop(32, padding=4), - tvt.RandomHorizontalFlip(), - tvt.ToTensor(), - tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - - if sampler is None: - shuffle = True - else: - shuffle = False - - dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=True, - download=True, transform=transform) - loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, - shuffle=shuffle, sampler=sampler, num_workers=4, pin_memory=True) - - return loader - - -def load_test_data(batch_size=1000, sampler=None): - transform = tvt.Compose([ - tvt.ToTensor(), - tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), - ]) - - dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=False, - download=True, transform=transform) - loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, - shuffle=False, sampler=sampler, num_workers=4, pin_memory=True) - - return loader +import os +import torch +from torchvision.datasets import CIFAR10 +import torchvision.transforms as tvt + + +def load_train_data(batch_size=64, sampler=None): + transform = tvt.Compose([ + tvt.RandomCrop(32, padding=4), + tvt.RandomHorizontalFlip(), + tvt.ToTensor(), + tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + + if sampler is None: + shuffle = True + else: + shuffle = False + + dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=True, + download=True, transform=transform) + loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, + shuffle=shuffle, sampler=sampler, num_workers=4, pin_memory=True) + + return loader + + +def load_test_data(batch_size=1000, sampler=None): + transform = tvt.Compose([ + tvt.ToTensor(), + tvt.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), + ]) + + dataset = CIFAR10(os.path.join('datasets', 'cifar10'), train=False, + download=True, transform=transform) + loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, + shuffle=False, sampler=sampler, num_workers=4, pin_memory=True) + + return loader diff --git a/src/models/NN/Type_DM/BinaryNet/dataloader/mnist.py b/src/models/NN/Type_DM/BinaryNet/dataloader/mnist.py index 7dcf662..86f69ad 100755 --- a/src/models/NN/Type_DM/BinaryNet/dataloader/mnist.py +++ b/src/models/NN/Type_DM/BinaryNet/dataloader/mnist.py @@ -1,38 +1,38 @@ -from torch.utils.data import DataLoader -from os.path import join -from torchvision.datasets import MNIST -from torchvision.transforms import Compose, Resize, Normalize, ToTensor - - -def load_train_data(batch_size=128, sampler=None): - cuda = True - loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} - - train_loader = DataLoader( - MNIST(join('datasets', 'mnist'), train=True, download=True, - transform=Compose([ - Resize((28, 28)), - ToTensor(), - Normalize((0.1307,),(0.308,)), - ])), - batch_size=batch_size, shuffle=True, **loader_kwargs) - - return train_loader - -def load_test_data(batch_size=1000, sampler=None): - - cuda = True - loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} - - test_loader = DataLoader( - MNIST(join('datasets', 'mnist'), train=False, download=True, - transform=Compose([ - Resize((28, 28)), - ToTensor(), - Normalize((0.1307,),(0.308,)), - ])), - batch_size= batch_size, shuffle=False,sampler=sampler, **loader_kwargs) - - return test_loader - - +from torch.utils.data import DataLoader +from os.path import join +from torchvision.datasets import MNIST +from torchvision.transforms import Compose, Resize, Normalize, ToTensor + + +def load_train_data(batch_size=128, sampler=None): + cuda = True + loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} + + train_loader = DataLoader( + MNIST(join('datasets', 'mnist'), train=True, download=True, + transform=Compose([ + Resize((28, 28)), + ToTensor(), + Normalize((0.1307,),(0.308,)), + ])), + batch_size=batch_size, shuffle=True, **loader_kwargs) + + return train_loader + +def load_test_data(batch_size=1000, sampler=None): + + cuda = True + loader_kwargs = {'num_workers': 0, 'pin_memory': True} if cuda else {} + + test_loader = DataLoader( + MNIST(join('datasets', 'mnist'), train=False, download=True, + transform=Compose([ + Resize((28, 28)), + ToTensor(), + Normalize((0.1307,),(0.308,)), + ])), + batch_size= batch_size, shuffle=False,sampler=sampler, **loader_kwargs) + + return test_loader + + diff --git a/src/models/NN/Type_DM/BinaryNet/main.py b/src/models/NN/Type_DM/BinaryNet/main.py index a174316..cf8da5c 100755 --- a/src/models/NN/Type_DM/BinaryNet/main.py +++ b/src/models/NN/Type_DM/BinaryNet/main.py @@ -1,49 +1,49 @@ -import torch -from classifiers.xnor_classifier import * -from classifiers.dorefa_classifier import * -from classifiers.bnn_classifier import * -from config import FLAGS -import importlib -from models import * - -cuda = torch.cuda.is_available() and not(FLAGS.no_cuda) -device = torch.device('cuda' if cuda else 'cpu') -torch.manual_seed(0) -if cuda: - torch.backends.cudnn.deterministic=True - torch.cuda.manual_seed(0) - -dataset = importlib.import_module("dataloader.{}".format(FLAGS.dataset)) -train_loader = dataset.load_train_data(FLAGS.batch_size) -test_loader = dataset.load_test_data(FLAGS.test_batch_size) - -model = eval(FLAGS.model)() -model.to(device) - - -if FLAGS.bin_type == 'xnor': - classification = XnorClassifier(model, train_loader, test_loader, device) - -elif FLAGS.bin_type == 'bnn': - classification = BnnClassifier(model, train_loader, test_loader, device) - -elif FLAGS.bin_type == 'dorefa': - classification = DorefaClassifier(model, train_loader, test_loader, device) - -criterion = torch.nn.CrossEntropyLoss() -criterion.to(device) - -if hasattr(model, 'init_w'): - model.init_w() - - -if FLAGS.optimizer == 'adam': - optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr, weight_decay=1e-5) -elif FLAGS.optimizer == 'sgd': - optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=0.9, - weight_decay=5.e-4) - -scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, FLAGS.steps, - gamma=FLAGS.gamma) - -classification.train(criterion, optimizer, FLAGS.epochs, scheduler, FLAGS.checkpoint) +import torch +from classifiers.xnor_classifier import * +from classifiers.dorefa_classifier import * +from classifiers.bnn_classifier import * +from config import FLAGS +import importlib +from models import * + +cuda = torch.cuda.is_available() and not(FLAGS.no_cuda) +device = torch.device('cuda' if cuda else 'cpu') +torch.manual_seed(0) +if cuda: + torch.backends.cudnn.deterministic=True + torch.cuda.manual_seed(0) + +dataset = importlib.import_module("dataloader.{}".format(FLAGS.dataset)) +train_loader = dataset.load_train_data(FLAGS.batch_size) +test_loader = dataset.load_test_data(FLAGS.test_batch_size) + +model = eval(FLAGS.model)() +model.to(device) + + +if FLAGS.bin_type == 'xnor': + classification = XnorClassifier(model, train_loader, test_loader, device) + +elif FLAGS.bin_type == 'bnn': + classification = BnnClassifier(model, train_loader, test_loader, device) + +elif FLAGS.bin_type == 'dorefa': + classification = DorefaClassifier(model, train_loader, test_loader, device) + +criterion = torch.nn.CrossEntropyLoss() +criterion.to(device) + +if hasattr(model, 'init_w'): + model.init_w() + + +if FLAGS.optimizer == 'adam': + optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr, weight_decay=1e-5) +elif FLAGS.optimizer == 'sgd': + optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.lr, momentum=0.9, + weight_decay=5.e-4) + +scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, FLAGS.steps, + gamma=FLAGS.gamma) + +classification.train(criterion, optimizer, FLAGS.epochs, scheduler, FLAGS.checkpoint) diff --git a/src/models/NN/Type_DM/BinaryNet/models/__init__.py b/src/models/NN/Type_DM/BinaryNet/models/__init__.py index 20eb788..923977d 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/__init__.py +++ b/src/models/NN/Type_DM/BinaryNet/models/__init__.py @@ -1,5 +1,5 @@ -from .xnor_nin import * -from .xnor_lenet import * -from .xnor_mlp import * -from .dorefa_resnet import * -from .bnn_caffenet import * +from .xnor_nin import * +from .xnor_lenet import * +from .xnor_mlp import * +from .dorefa_resnet import * +from .bnn_caffenet import * diff --git a/src/models/NN/Type_DM/BinaryNet/models/bnn_caffenet.py b/src/models/NN/Type_DM/BinaryNet/models/bnn_caffenet.py index c215779..341f43c 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/bnn_caffenet.py +++ b/src/models/NN/Type_DM/BinaryNet/models/bnn_caffenet.py @@ -1,61 +1,61 @@ -import torch.nn as nn -from .bnn_layers import * - - -__all__ = ['bnn_caffenet'] - - - -class BNNCaffenet(nn.Module): - - def __init__(self, num_classes=10): - super(BNNCaffenet, self).__init__() - - self.features = nn.Sequential( - - BNNConv2d(3, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), - nn.BatchNorm2d(32), - nn.Hardtanh(inplace=True), - nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), - - nn.Flatten(), - nn.BatchNorm1d(512), - nn.Hardtanh(inplace=True), - BNNLinear(512, num_classes), - nn.BatchNorm1d(num_classes, affine=False), - nn.LogSoftmax(dim=1), - ) - - def forward(self, x): - return self.features(x) - - - def init_w(self): - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - return - - -def bnn_caffenet(num_classes=10): - return BNNCaffenet(num_classes) - +import torch.nn as nn +from .bnn_layers import * + + +__all__ = ['bnn_caffenet'] + + + +class BNNCaffenet(nn.Module): + + def __init__(self, num_classes=10): + super(BNNCaffenet, self).__init__() + + self.features = nn.Sequential( + + BNNConv2d(3, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + BNNConv2d(32, 32, kernel_size=5, stride=1, padding=2, bias=False), + nn.BatchNorm2d(32), + nn.Hardtanh(inplace=True), + nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True), + + nn.Flatten(), + nn.BatchNorm1d(512), + nn.Hardtanh(inplace=True), + BNNLinear(512, num_classes), + nn.BatchNorm1d(num_classes, affine=False), + nn.LogSoftmax(dim=1), + ) + + def forward(self, x): + return self.features(x) + + + def init_w(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + return + + +def bnn_caffenet(num_classes=10): + return BNNCaffenet(num_classes) + diff --git a/src/models/NN/Type_DM/BinaryNet/models/bnn_layers.py b/src/models/NN/Type_DM/BinaryNet/models/bnn_layers.py index 14a3879..760e124 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/bnn_layers.py +++ b/src/models/NN/Type_DM/BinaryNet/models/bnn_layers.py @@ -1,62 +1,62 @@ -import torch -from torch.nn import Module, Conv2d, Linear -from torch.nn.functional import linear, conv2d - - -__all__ = ['BNNLinear', 'BNNConv2d'] - - - - -def Binarize(tensor,quant_mode='det'): - if quant_mode=='det': - return tensor.sign() - if quant_mode=='bin': - return (tensor>=0).type(type(tensor))*2-1 - else: - return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) - - -class BNNLinear(Linear): - - def __init__(self, *kargs, **kwargs): - super(BNNLinear, self).__init__(*kargs, **kwargs) - self.register_buffer('weight_org', self.weight.data.clone()) - - def forward(self, input): - - if (input.size(1) != 784) and (input.size(1) != 3072): - input.data=Binarize(input.data) - - self.weight.data=Binarize(self.weight_org) - out = linear(input, self.weight) - - if not self.bias is None: - self.bias.org=self.bias.data.clone() - out += self.bias.view(1, -1).expand_as(out) - - return out - - -class BNNConv2d(Conv2d): - - def __init__(self, *kargs, **kwargs): - super(BNNConv2d, self).__init__(*kargs, **kwargs) - self.register_buffer('weight_org', self.weight.data.clone()) - - def forward(self, input): - if input.size(1) != 3: - input.data = Binarize(input.data) - - self.weight.data=Binarize(self.weight_org) - - - out = conv2d(input, self.weight, None, self.stride, - self.padding, self.dilation, self.groups) - - if not self.bias is None: - self.bias.org=self.bias.data.clone() - out += self.bias.view(1, -1, 1, 1).expand_as(out) - - return out - +import torch +from torch.nn import Module, Conv2d, Linear +from torch.nn.functional import linear, conv2d + + +__all__ = ['BNNLinear', 'BNNConv2d'] + + + + +def Binarize(tensor,quant_mode='det'): + if quant_mode=='det': + return tensor.sign() + if quant_mode=='bin': + return (tensor>=0).type(type(tensor))*2-1 + else: + return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1) + + +class BNNLinear(Linear): + + def __init__(self, *kargs, **kwargs): + super(BNNLinear, self).__init__(*kargs, **kwargs) + self.register_buffer('weight_org', self.weight.data.clone()) + + def forward(self, input): + + if (input.size(1) != 784) and (input.size(1) != 3072): + input.data=Binarize(input.data) + + self.weight.data=Binarize(self.weight_org) + out = linear(input, self.weight) + + if not self.bias is None: + self.bias.org=self.bias.data.clone() + out += self.bias.view(1, -1).expand_as(out) + + return out + + +class BNNConv2d(Conv2d): + + def __init__(self, *kargs, **kwargs): + super(BNNConv2d, self).__init__(*kargs, **kwargs) + self.register_buffer('weight_org', self.weight.data.clone()) + + def forward(self, input): + if input.size(1) != 3: + input.data = Binarize(input.data) + + self.weight.data=Binarize(self.weight_org) + + + out = conv2d(input, self.weight, None, self.stride, + self.padding, self.dilation, self.groups) + + if not self.bias is None: + self.bias.org=self.bias.data.clone() + out += self.bias.view(1, -1, 1, 1).expand_as(out) + + return out + diff --git a/src/models/NN/Type_DM/BinaryNet/models/dorefa_layers.py b/src/models/NN/Type_DM/BinaryNet/models/dorefa_layers.py index b1dad33..9388b5b 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/dorefa_layers.py +++ b/src/models/NN/Type_DM/BinaryNet/models/dorefa_layers.py @@ -1,110 +1,110 @@ -import torch -import numpy as np -from torch.autograd import Function -from torch.nn import Conv2d, Linear -from torch.nn.functional import linear, conv2d - -__all__ = ['DOREFAConv2d','DOREFALinear'] - - -class ScaleSigner(Function): - """take a real value x, output sign(x)*E(|x|)""" - @staticmethod - def forward(ctx, input): - return torch.sign(input) * torch.mean(torch.abs(input)) - - @staticmethod - def backward(ctx, grad_output): - return grad_output - - -def scale_sign(input): - return ScaleSigner.apply(input) - - -class Quantizer(Function): - @staticmethod - def forward(ctx, input, nbit): - scale = 2 ** nbit - 1 - return torch.round(input * scale) / scale - - @staticmethod - def backward(ctx, grad_output): - return grad_output, None - - -def quantize(input, nbit): - return Quantizer.apply(input, nbit) - - -def dorefa_w(w, nbit_w): - if nbit_w == 1: - w = scale_sign(w) - else: - w = torch.tanh(w) - w = w / (2 * torch.max(torch.abs(w))) + 0.5 - w = 2 * quantize(w, nbit_w) - 1 - - return w - - -def dorefa_a(input, nbit_a): - return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a) - - -class DOREFAConv2d(Conv2d): - """docstring for QuanConv""" - def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, - nbit_a=1, stride=1, - padding=0, dilation=1, groups=1, - bias=True): - super(DOREFAConv2d, self).__init__( - in_channels, out_channels, kernel_size, stride, padding, dilation, - groups, bias) - self.nbit_w = nbit_w - self.nbit_a = nbit_a - name_w_dict = {'dorefa': dorefa_w} - name_a_dict = {'dorefa': dorefa_a} - self.quan_w = name_w_dict[quan_name_w] - self.quan_a = name_a_dict[quan_name_a] - - def forward(self, input): - if self.nbit_w < 32: - w = self.quan_w(self.weight, self.nbit_w) - else: - w = self.weight - - if self.nbit_a < 32: - x = self.quan_a(input, self.nbit_a) - else: - x = input - - output = conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) - - return output - -class DOREFALinear(Linear): - def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, nbit_a=1): - super(DOREFALinear, self).__init__(in_features, out_features, bias) - self.nbit_w = nbit_w - self.nbit_a = nbit_a - name_w_dict = {'dorefa': dorefa_w} - name_a_dict = {'dorefa': dorefa_a} - self.quan_w = name_w_dict[quan_name_w] - self.quan_a = name_a_dict[quan_name_a] - - def forward(self, input): - if self.nbit_w < 32: - w = self.quan_w(self.weight, self.nbit_w) - else: - w = self.weight - - if self.nbit_a < 32: - x = self.quan_a(input, self.nbit_a) - else: - x = input - - - output = linear(x, w, self.bias) - - return output +import torch +import numpy as np +from torch.autograd import Function +from torch.nn import Conv2d, Linear +from torch.nn.functional import linear, conv2d + +__all__ = ['DOREFAConv2d','DOREFALinear'] + + +class ScaleSigner(Function): + """take a real value x, output sign(x)*E(|x|)""" + @staticmethod + def forward(ctx, input): + return torch.sign(input) * torch.mean(torch.abs(input)) + + @staticmethod + def backward(ctx, grad_output): + return grad_output + + +def scale_sign(input): + return ScaleSigner.apply(input) + + +class Quantizer(Function): + @staticmethod + def forward(ctx, input, nbit): + scale = 2 ** nbit - 1 + return torch.round(input * scale) / scale + + @staticmethod + def backward(ctx, grad_output): + return grad_output, None + + +def quantize(input, nbit): + return Quantizer.apply(input, nbit) + + +def dorefa_w(w, nbit_w): + if nbit_w == 1: + w = scale_sign(w) + else: + w = torch.tanh(w) + w = w / (2 * torch.max(torch.abs(w))) + 0.5 + w = 2 * quantize(w, nbit_w) - 1 + + return w + + +def dorefa_a(input, nbit_a): + return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a) + + +class DOREFAConv2d(Conv2d): + """docstring for QuanConv""" + def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, + nbit_a=1, stride=1, + padding=0, dilation=1, groups=1, + bias=True): + super(DOREFAConv2d, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, bias) + self.nbit_w = nbit_w + self.nbit_a = nbit_a + name_w_dict = {'dorefa': dorefa_w} + name_a_dict = {'dorefa': dorefa_a} + self.quan_w = name_w_dict[quan_name_w] + self.quan_a = name_a_dict[quan_name_a] + + def forward(self, input): + if self.nbit_w < 32: + w = self.quan_w(self.weight, self.nbit_w) + else: + w = self.weight + + if self.nbit_a < 32: + x = self.quan_a(input, self.nbit_a) + else: + x = input + + output = conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) + + return output + +class DOREFALinear(Linear): + def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=1, nbit_a=1): + super(DOREFALinear, self).__init__(in_features, out_features, bias) + self.nbit_w = nbit_w + self.nbit_a = nbit_a + name_w_dict = {'dorefa': dorefa_w} + name_a_dict = {'dorefa': dorefa_a} + self.quan_w = name_w_dict[quan_name_w] + self.quan_a = name_a_dict[quan_name_a] + + def forward(self, input): + if self.nbit_w < 32: + w = self.quan_w(self.weight, self.nbit_w) + else: + w = self.weight + + if self.nbit_a < 32: + x = self.quan_a(input, self.nbit_a) + else: + x = input + + + output = linear(x, w, self.bias) + + return output diff --git a/src/models/NN/Type_DM/BinaryNet/models/dorefa_resnet.py b/src/models/NN/Type_DM/BinaryNet/models/dorefa_resnet.py index 92ebbb9..3f426d6 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/dorefa_resnet.py +++ b/src/models/NN/Type_DM/BinaryNet/models/dorefa_resnet.py @@ -1,154 +1,154 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .dorefa_layers import DOREFAConv2d as Conv -from .dorefa_layers import DOREFALinear as Linear - -__all__ = ['dorefa_resnet18'] - - -def conv3x3(in_planes, out_planes, wbit, abit, stride=1): - """3x3 convolution with padding""" - return Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, nbit_w=wbit, nbit_a=abit) - - -def conv1x1(in_planes, out_planes, wbit, abit, stride=1): - """1x1 convolution""" - return Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, nbit_w=wbit, nbit_a=abit) - - -class BasicBlock(nn.Module): - expansion = 1 - - def __init__(self, in_planes, planes, wbit, abit, sparsity_list, stride=1): - super(BasicBlock, self).__init__() - - self.bb = nn.Sequential( - conv3x3(in_planes, planes, wbit=wbit, abit=abit, stride=stride), - nn.BatchNorm2d(planes), - nn.ReLU(inplace=True), - conv3x3(planes, planes, wbit=wbit, abit=abit, stride=1), - nn.BatchNorm2d(planes), - ) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - conv1x1(in_planes, self.expansion*planes, wbit=wbit, abit=abit, stride=stride), - nn.BatchNorm2d(self.expansion*planes,sparsity_list) - ) - - def forward(self, x): - out = self.bb(x) - out += self.shortcut(x) - out = F.relu(out) - return out - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, in_planes, planes, wbit, abit, stride=1): - super(Bottleneck, self).__init__() - self.conv1 = conv1x1(in_planes, planes, wbit=wbit, abit=abit, stride=1) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = conv3x3(planes, planes, wbit=wbit, abit=abit, stride=stride) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = conv1x1(planes, self.expansion*planes,wbit=wbit, abit=abit, stride=1) - self.bn3 = nn.BatchNorm2d(self.expansion*planes) - - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion*planes: - self.shortcut = nn.Sequential( - conv1x1(in_planes, self.expansion*planes,wbit=wbit,abit=abit,stride=stride), - nn.BatchNorm2d(self.expansion*planes) - ) - - def forward(self, x): - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - out += self.shortcut(x) - out = F.relu(out) - return out - - - - - - -class ResNet(nn.Module): - def __init__(self, block, num_blocks, wbit=1, abit=1, num_classes=10): - super(ResNet, self).__init__() - self.in_planes = 64 - - self.head = nn.Sequential( - nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), - nn.BatchNorm2d(64), - nn.ReLU(inplace=True), - ) - - self.layer1 = self._make_layer(block, 64, num_blocks[0], wbit=wbit, abit=abit, stride=1) - self.layer2 = self._make_layer(block, 128, num_blocks[1], wbit=wbit, abit=abit, stride=2) - self.layer3 = self._make_layer(block, 256, num_blocks[2], wbit=wbit, abit=abit, stride=2) - self.layer4 = self._make_layer(block, 512, num_blocks[3], wbit=wbit, abit=abit, stride=2) - - self.tail = nn.Sequential( - nn.AdaptiveAvgPool2d(1), - nn.Flatten(), - nn.Linear(512*block.expansion, num_classes), - ) - - def init_w(self): - # weight initialization - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode='fan_out') - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.BatchNorm2d): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Linear): - nn.init.normal_(m.weight, 0, 0.01) - nn.init.zeros_(m.bias) - return - - def _make_layer(self, block, planes, num_blocks, wbit, abit, stride): - strides = [stride] + [1]*(num_blocks-1) - layers = [] - for stride in strides: - layers.append(block(self.in_planes, planes, wbit, abit, stride)) - self.in_planes = planes * block.expansion - return nn.Sequential(*layers) - - def forward(self, x): - - out = self.head(x) - out = self.layer1(out) - out = self.layer2(out) - out = self.layer3(out) - out = self.layer4(out) - out = self.tail(out) - return out - - - - -def dorefa_resnet18(wbit=1, abit=1): - return ResNet(BasicBlock, [2,2,2,2], wbit=wbit, abit=abit) - -def ResNet34(wbit, abit): - return ResNet(BasicBlock, [3,4,6,3], wbit=wbit, abit=abit) - -def ResNet50(wbit, abit): - return ResNet(Bottleneck, [3,4,6,3], wbit=wbit, abit=abit) - -def ResNet101(wbit, abit): - return ResNet(Bottleneck, [3,4,23,3], wbit=wbit, abit=abit) - -def ResNet152(wbit, abit): - return ResNet(Bottleneck, [3,8,36,3], wbit=wbit, abit=abit) - - +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .dorefa_layers import DOREFAConv2d as Conv +from .dorefa_layers import DOREFALinear as Linear + +__all__ = ['dorefa_resnet18'] + + +def conv3x3(in_planes, out_planes, wbit, abit, stride=1): + """3x3 convolution with padding""" + return Conv(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, nbit_w=wbit, nbit_a=abit) + + +def conv1x1(in_planes, out_planes, wbit, abit, stride=1): + """1x1 convolution""" + return Conv(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, nbit_w=wbit, nbit_a=abit) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, wbit, abit, sparsity_list, stride=1): + super(BasicBlock, self).__init__() + + self.bb = nn.Sequential( + conv3x3(in_planes, planes, wbit=wbit, abit=abit, stride=stride), + nn.BatchNorm2d(planes), + nn.ReLU(inplace=True), + conv3x3(planes, planes, wbit=wbit, abit=abit, stride=1), + nn.BatchNorm2d(planes), + ) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + conv1x1(in_planes, self.expansion*planes, wbit=wbit, abit=abit, stride=stride), + nn.BatchNorm2d(self.expansion*planes,sparsity_list) + ) + + def forward(self, x): + out = self.bb(x) + out += self.shortcut(x) + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, in_planes, planes, wbit, abit, stride=1): + super(Bottleneck, self).__init__() + self.conv1 = conv1x1(in_planes, planes, wbit=wbit, abit=abit, stride=1) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = conv3x3(planes, planes, wbit=wbit, abit=abit, stride=stride) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = conv1x1(planes, self.expansion*planes,wbit=wbit, abit=abit, stride=1) + self.bn3 = nn.BatchNorm2d(self.expansion*planes) + + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion*planes: + self.shortcut = nn.Sequential( + conv1x1(in_planes, self.expansion*planes,wbit=wbit,abit=abit,stride=stride), + nn.BatchNorm2d(self.expansion*planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + out += self.shortcut(x) + out = F.relu(out) + return out + + + + + + +class ResNet(nn.Module): + def __init__(self, block, num_blocks, wbit=1, abit=1, num_classes=10): + super(ResNet, self).__init__() + self.in_planes = 64 + + self.head = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + ) + + self.layer1 = self._make_layer(block, 64, num_blocks[0], wbit=wbit, abit=abit, stride=1) + self.layer2 = self._make_layer(block, 128, num_blocks[1], wbit=wbit, abit=abit, stride=2) + self.layer3 = self._make_layer(block, 256, num_blocks[2], wbit=wbit, abit=abit, stride=2) + self.layer4 = self._make_layer(block, 512, num_blocks[3], wbit=wbit, abit=abit, stride=2) + + self.tail = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + nn.Flatten(), + nn.Linear(512*block.expansion, num_classes), + ) + + def init_w(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + return + + def _make_layer(self, block, planes, num_blocks, wbit, abit, stride): + strides = [stride] + [1]*(num_blocks-1) + layers = [] + for stride in strides: + layers.append(block(self.in_planes, planes, wbit, abit, stride)) + self.in_planes = planes * block.expansion + return nn.Sequential(*layers) + + def forward(self, x): + + out = self.head(x) + out = self.layer1(out) + out = self.layer2(out) + out = self.layer3(out) + out = self.layer4(out) + out = self.tail(out) + return out + + + + +def dorefa_resnet18(wbit=1, abit=1): + return ResNet(BasicBlock, [2,2,2,2], wbit=wbit, abit=abit) + +def ResNet34(wbit, abit): + return ResNet(BasicBlock, [3,4,6,3], wbit=wbit, abit=abit) + +def ResNet50(wbit, abit): + return ResNet(Bottleneck, [3,4,6,3], wbit=wbit, abit=abit) + +def ResNet101(wbit, abit): + return ResNet(Bottleneck, [3,4,23,3], wbit=wbit, abit=abit) + +def ResNet152(wbit, abit): + return ResNet(Bottleneck, [3,8,36,3], wbit=wbit, abit=abit) + + diff --git a/src/models/NN/Type_DM/BinaryNet/models/xnor_layers.py b/src/models/NN/Type_DM/BinaryNet/models/xnor_layers.py index 203f100..1318c2b 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/xnor_layers.py +++ b/src/models/NN/Type_DM/BinaryNet/models/xnor_layers.py @@ -1,145 +1,145 @@ -from torch import zeros -from torch.autograd import Function -from torch.nn import Parameter, Module, Conv2d, Linear, BatchNorm1d, BatchNorm2d, Dropout, ReLU - - -__all__ = ['XNORConv2d', 'XNORLinear', 'BNConvReLU','BNLinearReLU'] - - -class BinActive(Function): - @staticmethod - def forward(ctx, input): - ctx.save_for_backward(input) - input = input.sign() - return input - - @staticmethod - def backward(ctx, grad_output): - input, = ctx.saved_tensors - grad_input = grad_output.clone() - grad_input[input.ge(1)] = 0 - grad_input[input.le(-1)] = 0 - return grad_input - - -class XNORConv2d(Module): - def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): - super(XNORConv2d, self).__init__() - - self.in_channels = in_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.stride = stride - self.padding = padding - self.groups = groups - - self.conv = Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = kernel_size, stride = stride, padding = padding, groups = groups) - self.conv.weight.data.normal_(0, 0.05) - self.conv.bias.data.zero_() - - self.fp_weights = Parameter(zeros(self.conv.weight.size())) - self.fp_weights.data.copy_(self.conv.weight.data) - - def forward(self, x): - - self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) - self.fp_weights.data.clamp_(-1, 1) - self.mean_val = self.fp_weights.abs().view(self.out_channels, -1).mean(1, keepdim=True) - - self.conv.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1, 1, 1)) - x = self.conv(x) - - return x - - def update_gradient(self): - proxy = self.fp_weights.abs().sign() - proxy[self.fp_weights.data.abs()>1] = 0 - binary_grad = self.conv.weight.grad * self.mean_val.view(-1, 1, 1, 1) * proxy - - mean_grad = self.conv.weight.data.sign() * self.conv.weight.grad - mean_grad = mean_grad.view(self.out_channels, -1).mean(1).view(-1, 1, 1, 1) - mean_grad = mean_grad * self.conv.weight.data.sign() - - self.fp_weights.grad = binary_grad + mean_grad - self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) - -class BNConvReLU(Module): - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): - super(BNConvReLU, self).__init__() - self.dropout = dropout_ratio - self.a_active = BinActive.apply - - self.bn = BatchNorm2d(in_channels, eps=1e-4, momentum=0.1, affine=True) - if self.dropout !=0: - self.drop = Dropout(self.dropout, inplace=True) - self.econv = XNORConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias) - self.relu = ReLU(inplace=True) - - def forward(self, x): - - x = self.bn(x) - x = self.a_active(x) - if self.dropout !=0: - x = self.drop(x) - - x = self.econv(x) - x = self.relu(x) - return x - - -class XNORLinear(Module): - def __init__(self, in_features, out_features, bias=True): - super(XNORLinear, self).__init__() - self.in_features = in_features - self.out_features = out_features - self.bias = bias - - self.linear = Linear(in_features = in_features, out_features = out_features, bias = bias) - self.fp_weights = Parameter(zeros(self.linear.weight.size())) - self.fp_weights.data.copy_(self.linear.weight.data) - - def forward(self, x): - self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) - self.fp_weights.data.clamp_(-1, 1) - - self.mean_val = self.fp_weights.abs().view(self.out_features, -1).mean(1, keepdim=True) - - self.linear.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1)) - x = self.linear(x) - return x - - def update_gradient(self): - proxy = self.fp_weights.abs().sign() - proxy[self.fp_weights.data.abs()>1] = 0 - binary_grad = self.linear.weight.grad * self.mean_val.view(-1, 1) * proxy - - mean_grad = self.linear.weight.data.sign() * self.linear.weight.grad - mean_grad = mean_grad.view(self.out_features, -1).mean(1).view(-1, 1) - mean_grad = mean_grad * self.linear.weight.data.sign() - - self.fp_weights.grad = binary_grad + mean_grad - self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) - return - -class BNLinearReLU(Module): - def __init__(self, in_channels, out_channels, bias=True, dropout_ratio=0): - super(BNLinearReLU, self).__init__() - self.dropout = dropout_ratio - self.a_active = BinActive.apply - - self.bn = BatchNorm1d(in_channels, eps=1e-4, momentum=0.1, affine=True) - if self.dropout !=0: - self.drop = Dropout(self.dropout, inplace=True) - self.fc = XNORLinear(in_channels, out_channels, bias=bias) - self.relu = ReLU(inplace=True) - - def forward(self, x): - - x = self.bn(x) - x = self.a_active(x) - if self.dropout !=0: - x = self.drop(x) - - x = self.fc(x) - x = self.relu(x) - return x +from torch import zeros +from torch.autograd import Function +from torch.nn import Parameter, Module, Conv2d, Linear, BatchNorm1d, BatchNorm2d, Dropout, ReLU + + +__all__ = ['XNORConv2d', 'XNORLinear', 'BNConvReLU','BNLinearReLU'] + + +class BinActive(Function): + @staticmethod + def forward(ctx, input): + ctx.save_for_backward(input) + input = input.sign() + return input + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + grad_input = grad_output.clone() + grad_input[input.ge(1)] = 0 + grad_input[input.le(-1)] = 0 + return grad_input + + +class XNORConv2d(Module): + def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): + super(XNORConv2d, self).__init__() + + self.in_channels = in_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.stride = stride + self.padding = padding + self.groups = groups + + self.conv = Conv2d(in_channels = in_channels, out_channels = out_channels, kernel_size = kernel_size, stride = stride, padding = padding, groups = groups) + self.conv.weight.data.normal_(0, 0.05) + self.conv.bias.data.zero_() + + self.fp_weights = Parameter(zeros(self.conv.weight.size())) + self.fp_weights.data.copy_(self.conv.weight.data) + + def forward(self, x): + + self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) + self.fp_weights.data.clamp_(-1, 1) + self.mean_val = self.fp_weights.abs().view(self.out_channels, -1).mean(1, keepdim=True) + + self.conv.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1, 1, 1)) + x = self.conv(x) + + return x + + def update_gradient(self): + proxy = self.fp_weights.abs().sign() + proxy[self.fp_weights.data.abs()>1] = 0 + binary_grad = self.conv.weight.grad * self.mean_val.view(-1, 1, 1, 1) * proxy + + mean_grad = self.conv.weight.data.sign() * self.conv.weight.grad + mean_grad = mean_grad.view(self.out_channels, -1).mean(1).view(-1, 1, 1, 1) + mean_grad = mean_grad * self.conv.weight.data.sign() + + self.fp_weights.grad = binary_grad + mean_grad + self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) + +class BNConvReLU(Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, bias=True, dropout_ratio=0): + super(BNConvReLU, self).__init__() + self.dropout = dropout_ratio + self.a_active = BinActive.apply + + self.bn = BatchNorm2d(in_channels, eps=1e-4, momentum=0.1, affine=True) + if self.dropout !=0: + self.drop = Dropout(self.dropout, inplace=True) + self.econv = XNORConv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=bias) + self.relu = ReLU(inplace=True) + + def forward(self, x): + + x = self.bn(x) + x = self.a_active(x) + if self.dropout !=0: + x = self.drop(x) + + x = self.econv(x) + x = self.relu(x) + return x + + +class XNORLinear(Module): + def __init__(self, in_features, out_features, bias=True): + super(XNORLinear, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.bias = bias + + self.linear = Linear(in_features = in_features, out_features = out_features, bias = bias) + self.fp_weights = Parameter(zeros(self.linear.weight.size())) + self.fp_weights.data.copy_(self.linear.weight.data) + + def forward(self, x): + self.fp_weights.data = self.fp_weights.data - self.fp_weights.data.mean(1, keepdim = True) + self.fp_weights.data.clamp_(-1, 1) + + self.mean_val = self.fp_weights.abs().view(self.out_features, -1).mean(1, keepdim=True) + + self.linear.weight.data.copy_(self.fp_weights.data.sign() * self.mean_val.view(-1, 1)) + x = self.linear(x) + return x + + def update_gradient(self): + proxy = self.fp_weights.abs().sign() + proxy[self.fp_weights.data.abs()>1] = 0 + binary_grad = self.linear.weight.grad * self.mean_val.view(-1, 1) * proxy + + mean_grad = self.linear.weight.data.sign() * self.linear.weight.grad + mean_grad = mean_grad.view(self.out_features, -1).mean(1).view(-1, 1) + mean_grad = mean_grad * self.linear.weight.data.sign() + + self.fp_weights.grad = binary_grad + mean_grad + self.fp_weights.grad = self.fp_weights.grad * self.fp_weights.data[0].nelement() * (1-1/self.fp_weights.data.size(1)) + return + +class BNLinearReLU(Module): + def __init__(self, in_channels, out_channels, bias=True, dropout_ratio=0): + super(BNLinearReLU, self).__init__() + self.dropout = dropout_ratio + self.a_active = BinActive.apply + + self.bn = BatchNorm1d(in_channels, eps=1e-4, momentum=0.1, affine=True) + if self.dropout !=0: + self.drop = Dropout(self.dropout, inplace=True) + self.fc = XNORLinear(in_channels, out_channels, bias=bias) + self.relu = ReLU(inplace=True) + + def forward(self, x): + + x = self.bn(x) + x = self.a_active(x) + if self.dropout !=0: + x = self.drop(x) + + x = self.fc(x) + x = self.relu(x) + return x diff --git a/src/models/NN/Type_DM/BinaryNet/models/xnor_lenet.py b/src/models/NN/Type_DM/BinaryNet/models/xnor_lenet.py index 444c1f9..ba2031b 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/xnor_lenet.py +++ b/src/models/NN/Type_DM/BinaryNet/models/xnor_lenet.py @@ -1,45 +1,45 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['lenet5'] - -class LeNet5(nn.Module): - def __init__(self, out_classes = 10): - super(LeNet5, self).__init__() - self.features = nn.Sequential( - nn.Conv2d(1, 20, kernel_size=5, stride=1), - nn.BatchNorm2d(20, eps=1e-4, momentum=0.1, affine=False), - nn.ReLU(inplace=True), - nn.MaxPool2d(kernel_size=2, stride=2), - XNORConv2d(20, 50, kernel_size=5, stride=1, padding=0), - nn.MaxPool2d(kernel_size=2, stride=2), - nn.Flatten(), - ) - self.classifier = nn.Sequential( - BNLinearReLU(800, 500), - nn.BatchNorm1d(500, eps=1e-4, momentum=0.1, affine=False), - nn.Linear(500, out_classes), - ) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.features(x) - x = self.classifier(x) - return x - -def lenet5(out_classes=10): - return LeNet5(out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['lenet5'] + +class LeNet5(nn.Module): + def __init__(self, out_classes = 10): + super(LeNet5, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 20, kernel_size=5, stride=1), + nn.BatchNorm2d(20, eps=1e-4, momentum=0.1, affine=False), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + XNORConv2d(20, 50, kernel_size=5, stride=1, padding=0), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Flatten(), + ) + self.classifier = nn.Sequential( + BNLinearReLU(800, 500), + nn.BatchNorm1d(500, eps=1e-4, momentum=0.1, affine=False), + nn.Linear(500, out_classes), + ) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.features(x) + x = self.classifier(x) + return x + +def lenet5(out_classes=10): + return LeNet5(out_classes) diff --git a/src/models/NN/Type_DM/BinaryNet/models/xnor_mlp.py b/src/models/NN/Type_DM/BinaryNet/models/xnor_mlp.py index b8768e6..b4dc26a 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/xnor_mlp.py +++ b/src/models/NN/Type_DM/BinaryNet/models/xnor_mlp.py @@ -1,44 +1,44 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['mlp'] - -class MLP(nn.Module): - def __init__(self, input_size, num_hidden_nodes, num_layers, out_classes ): - super(MLP, self).__init__() - self.num_layers = num_layers - self.classifier = nn.Sequential() - for l in range(num_layers): - if l==0: - self.classifier.add_module('layer'+str(l)+'_flatten', nn.Flatten()) - self.classifier.add_module('layer'+str(l), nn.Linear(input_size, num_hidden_nodes[l])) - self.classifier.add_module('layer'+str(l)+'_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) - self.classifier.add_module('layer'+str(l)+'_activate', nn.ReLU(inplace=True)) - elif l+1 == num_layers: - self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], out_classes)) - else: - self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], num_hidden_nodes[l])) - self.classifier.add_module('layer' + str(l) + '_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) - self.classifier.add_module('layer' + str(l) + '_activate', nn.ReLU(inplace=True)) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.classifier(x) - return x - -def mlp(input_size, num_hidden_nodes, num_layers, out_classes): - return MLP(input_size, num_hidden_nodes, num_layers, out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['mlp'] + +class MLP(nn.Module): + def __init__(self, input_size, num_hidden_nodes, num_layers, out_classes ): + super(MLP, self).__init__() + self.num_layers = num_layers + self.classifier = nn.Sequential() + for l in range(num_layers): + if l==0: + self.classifier.add_module('layer'+str(l)+'_flatten', nn.Flatten()) + self.classifier.add_module('layer'+str(l), nn.Linear(input_size, num_hidden_nodes[l])) + self.classifier.add_module('layer'+str(l)+'_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) + self.classifier.add_module('layer'+str(l)+'_activate', nn.ReLU(inplace=True)) + elif l+1 == num_layers: + self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], out_classes)) + else: + self.classifier.add_module('layer'+str(l), nn.Linear(num_hidden_nodes[l-1], num_hidden_nodes[l])) + self.classifier.add_module('layer' + str(l) + '_normal', nn.BatchNorm1d(num_hidden_nodes[l], eps=1e-4, momentum=0.1, affine=False)) + self.classifier.add_module('layer' + str(l) + '_activate', nn.ReLU(inplace=True)) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.classifier(x) + return x + +def mlp(input_size, num_hidden_nodes, num_layers, out_classes): + return MLP(input_size, num_hidden_nodes, num_layers, out_classes) diff --git a/src/models/NN/Type_DM/BinaryNet/models/xnor_nin.py b/src/models/NN/Type_DM/BinaryNet/models/xnor_nin.py index 146cab5..952ca07 100755 --- a/src/models/NN/Type_DM/BinaryNet/models/xnor_nin.py +++ b/src/models/NN/Type_DM/BinaryNet/models/xnor_nin.py @@ -1,55 +1,55 @@ -import torch.nn as nn -from .xnor_layers import * - -__all__ = ['nin'] - -class NIN(nn.Module): - def __init__(self, out_class=10): - super(NIN, self).__init__() - - - self.features = nn.Sequential( - nn.Conv2d(3, 192, kernel_size = 5, stride = 1, padding = 2), - nn.BatchNorm2d(192, eps=1e-4, momentum = 0.1, affine = False), - nn.ReLU(inplace=True), - - BNConvReLU(192, 160, kernel_size=1, stride=1, padding=0), - BNConvReLU(160, 96, kernel_size=1, stride=1, padding=0), - nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1), - - BNConvReLU(96, 192, kernel_size=5, stride=1, padding=2, dropout_ratio=0.5), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - nn.AvgPool2d(kernel_size = 3, stride = 2, padding = 1), - - BNConvReLU(192, 192, kernel_size=3, stride=1, padding=1, dropout_ratio=0.5), - BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), - - nn.BatchNorm2d(192, eps = 1e-4, momentum = 0.1, affine = False), - nn.Conv2d(192, out_class, kernel_size = 1, stride = 1, padding = 0), - nn.ReLU(inplace=True), - nn.AdaptiveAvgPool2d(1), - nn.Flatten() - ) - - def init_w(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.zero_().add_(1.0) - return - - def norm_bn(self): - for m in self.modules(): - if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): - if hasattr(m.weight, 'data'): - m.weight.data.clamp_(min = 0.01) - return - - def forward(self, x): - self.norm_bn() - x = self.features(x) - return x - -def nin(out_classes=10): - return NIN(out_classes) +import torch.nn as nn +from .xnor_layers import * + +__all__ = ['nin'] + +class NIN(nn.Module): + def __init__(self, out_class=10): + super(NIN, self).__init__() + + + self.features = nn.Sequential( + nn.Conv2d(3, 192, kernel_size = 5, stride = 1, padding = 2), + nn.BatchNorm2d(192, eps=1e-4, momentum = 0.1, affine = False), + nn.ReLU(inplace=True), + + BNConvReLU(192, 160, kernel_size=1, stride=1, padding=0), + BNConvReLU(160, 96, kernel_size=1, stride=1, padding=0), + nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1), + + BNConvReLU(96, 192, kernel_size=5, stride=1, padding=2, dropout_ratio=0.5), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + nn.AvgPool2d(kernel_size = 3, stride = 2, padding = 1), + + BNConvReLU(192, 192, kernel_size=3, stride=1, padding=1, dropout_ratio=0.5), + BNConvReLU(192, 192, kernel_size=1, stride=1, padding=0), + + nn.BatchNorm2d(192, eps = 1e-4, momentum = 0.1, affine = False), + nn.Conv2d(192, out_class, kernel_size = 1, stride = 1, padding = 0), + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d(1), + nn.Flatten() + ) + + def init_w(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.zero_().add_(1.0) + return + + def norm_bn(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): + if hasattr(m.weight, 'data'): + m.weight.data.clamp_(min = 0.01) + return + + def forward(self, x): + self.norm_bn() + x = self.features(x) + return x + +def nin(out_classes=10): + return NIN(out_classes) diff --git a/src/models/NN/Type_DM/BinaryNet/requirements.txt b/src/models/NN/Type_DM/BinaryNet/requirements.txt index dc3af0c..40ebca7 100755 --- a/src/models/NN/Type_DM/BinaryNet/requirements.txt +++ b/src/models/NN/Type_DM/BinaryNet/requirements.txt @@ -1,5 +1,5 @@ -torch -torchvision -tqdm -pyyaml - +torch +torchvision +tqdm +pyyaml + diff --git a/src/models/NN/Type_DM/BinaryNet/yml/bnn_caffenet_cifar10.yml b/src/models/NN/Type_DM/BinaryNet/yml/bnn_caffenet_cifar10.yml index 99fab9d..2c352c2 100755 --- a/src/models/NN/Type_DM/BinaryNet/yml/bnn_caffenet_cifar10.yml +++ b/src/models/NN/Type_DM/BinaryNet/yml/bnn_caffenet_cifar10.yml @@ -1,17 +1,17 @@ -no_cuda: False -checkpoint: "results/bnn_caffenet_cifar10" -filename: null -pretrained: null -bin_type: 'bnn' - -model : "bnn_caffenet" -save_path: "results/bnn_caffenet_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'sgd' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/bnn_caffenet_cifar10" +filename: null +pretrained: null +bin_type: 'bnn' + +model : "bnn_caffenet" +save_path: "results/bnn_caffenet_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'sgd' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_DM/BinaryNet/yml/dorefa_resnet_cifar10.yml b/src/models/NN/Type_DM/BinaryNet/yml/dorefa_resnet_cifar10.yml index dffecfb..29f5c73 100755 --- a/src/models/NN/Type_DM/BinaryNet/yml/dorefa_resnet_cifar10.yml +++ b/src/models/NN/Type_DM/BinaryNet/yml/dorefa_resnet_cifar10.yml @@ -1,18 +1,18 @@ -no_cuda: False -checkpoint: "results/dorefa_resnet_cifar10" -filename: null -pretrained: null - -bin_type: 'dorefa' - -model : "dorefa_resnet18" -save_path: "results/dorefa_resnet_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'sgd' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/dorefa_resnet_cifar10" +filename: null +pretrained: null + +bin_type: 'dorefa' + +model : "dorefa_resnet18" +save_path: "results/dorefa_resnet_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'sgd' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_DM/BinaryNet/yml/lenet_mnist.yml b/src/models/NN/Type_DM/BinaryNet/yml/lenet_mnist.yml index 9226466..5eb5c0b 100755 --- a/src/models/NN/Type_DM/BinaryNet/yml/lenet_mnist.yml +++ b/src/models/NN/Type_DM/BinaryNet/yml/lenet_mnist.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/lenet_mnist" -filename: null -pretrained: null -bin_type: "xnor" -model : "lenet5" -save_path: "results/lenet_mnist" -dataset : "mnist" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [100, 200] -epochs: 300 - +no_cuda: False +checkpoint: "results/lenet_mnist" +filename: null +pretrained: null +bin_type: "xnor" +model : "lenet5" +save_path: "results/lenet_mnist" +dataset : "mnist" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [100, 200] +epochs: 300 + diff --git a/src/models/NN/Type_DM/BinaryNet/yml/mlp_mnist.yml b/src/models/NN/Type_DM/BinaryNet/yml/mlp_mnist.yml index 98511c9..94afe32 100755 --- a/src/models/NN/Type_DM/BinaryNet/yml/mlp_mnist.yml +++ b/src/models/NN/Type_DM/BinaryNet/yml/mlp_mnist.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/mlp_mnist" -filename: null -pretrained: null -bin_type: "xnor" -model : "mlp" -save_path: "results/mlp_mnist" -dataset : "mnist" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [100, 200] -epochs: 300 - +no_cuda: False +checkpoint: "results/mlp_mnist" +filename: null +pretrained: null +bin_type: "xnor" +model : "mlp" +save_path: "results/mlp_mnist" +dataset : "mnist" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [100, 200] +epochs: 300 + diff --git a/src/models/NN/Type_DM/BinaryNet/yml/nin_cifar10.yml b/src/models/NN/Type_DM/BinaryNet/yml/nin_cifar10.yml index dae28c6..c45127c 100755 --- a/src/models/NN/Type_DM/BinaryNet/yml/nin_cifar10.yml +++ b/src/models/NN/Type_DM/BinaryNet/yml/nin_cifar10.yml @@ -1,16 +1,16 @@ -no_cuda: False -checkpoint: "results/nin_cifar10" -filename: null -pretrained: null -bin_type: 'xnor' -model : "nin" -save_path: "results/nin_cifar10" -dataset : "cifar10" -batch_size: 128 -test_batch_size: 100 -optimizer: 'adam' -lr: 0.01 -gamma: 0.1 -steps: [80, 150] -epochs: 300 - +no_cuda: False +checkpoint: "results/nin_cifar10" +filename: null +pretrained: null +bin_type: 'xnor' +model : "nin" +save_path: "results/nin_cifar10" +dataset : "cifar10" +batch_size: 128 +test_batch_size: 100 +optimizer: 'adam' +lr: 0.01 +gamma: 0.1 +steps: [80, 150] +epochs: 300 + diff --git a/src/models/NN/Type_DM/dedicated_p4.py b/src/models/NN/Type_DM/dedicated_p4.py index f68552c..d422cac 100755 --- a/src/models/NN/Type_DM/dedicated_p4.py +++ b/src/models/NN/Type_DM/dedicated_p4.py @@ -1,316 +1,316 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - -def write_compare(c_n, con_list, num_class, txt): - if c_n == num_class-1: - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) - +" = meta.middle_c" +str(int(compare[0])) +" - meta.middle_c"+str(int(compare[1]))+";\n") - - c_n += 1 - write_compare(c_n, con_list, num_class, txt) - c_n -= 1 - - return - - -def do_compare(c_n, con_list, num_class, txt, label, config): - if c_n == num_class-1: - txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(int(label))+";\n" - " "+(c_n-1)*" "+"}\n") - return - else: - for con in ['if','else']: - con_list[c_n] = con - compare = [0,0] - for d in range(c_n): - if con_list[d] == 'if': - compare[0] = d+1 - compare[1] = c_n+1 - if con == 'if': - label = compare[1] - # print(con_list, c_n) - txt.write(" "+c_n*" "+con+"(meta.compare" - +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" - +(10-1)*"0"+"!=0){\n") #<0 - else: - label = compare[0] - txt.write(" "+c_n*" "+con + "{\n") - c_n += 1 - do_compare(c_n, con_list, num_class, txt, label, config) - c_n -= 1 - if con == 'else' and c_n != 0: - txt.write(" " + (c_n-1) * " " + "}\n") - return - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_hidden_nodes'] = config_file['num hidden nodes'] - config['num_layers'] = config_file["number of layers"] - config['num_classes'] = config_file["number of classes"] - config['model'] = config_file['model'] - config['width'] = config_file["width of inputs"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for c in range(0, config['num_classes']): - headers.write(" bit<" + str(10) + "> middle_c" + str(c) + ";\n") - - for c in range(config['num_classes']): - for c1 in range(c + 1, config['num_classes']): - headers.write(" bit<" + str(10) + "> compare" + str(c) + "_" + str(c1) + ";\n") - - headers.write(" bit<64> bnnInput;\n" - " bit<64> XNOROutput;\n" - " bit<64> NextLayerInput;\n" - " bit<1> activated;\n" - " bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - ingress.write(" meta.bnnInput = 0;\n" - " meta.XNOROutput = 0;\n" - " meta.NextLayerInput = 0;\n" - " BuildInput();\n\n") - - count = 0 - for l in range(config['num_layers']): - ingress.write(" Layer"+str(l)+"_Process("+str(int(count))+");\n") - if l + 1 != config['num_layers']: - ingress.write(" meta.bnnInput = meta.NextLayerInput;\n" - " meta.NextLayerInput = 0;\n") - ingress.write("\n") - if l+1== config['num_layers']: - break - count+=config['num_hidden_nodes'][l] - - ingress.write(" compare();\n") - do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) - - # ingress.write(" \n" - # " bit<48> tmp;\n" - # " /* Swap the MAC addresses */\n" - # " tmp = hdr.ethernet.dstAddr;\n" - # " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" - # " hdr.ethernet.srcAddr = tmp;\n" - # # " bit < 64 > weight = 0;\n" - # # " weights.read( weight, 0); \n" - # # " hdr.Planter.result = (bit<32>)meta.bnnInput;\n" - # # " hdr.Planter.result = debug;\n" - # # " hdr.Planter.result = (bit<32>)meta.middle_c2;\n" - # " send(ig_intr_md.ingress_port);\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - - - ingress.write(" register>(1024) weights;\n" - # " //bit<8> count = 0;\n" - # " bit<1> activated = 0;\n" - " bit<128> m1 = 0x55555555555555555555555555555555;\n" - " bit<128> m2 = 0x33333333333333333333333333333333;\n" - " bit<128> m4 = 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f;\n" - " bit<128> m8 = 0x00ff00ff00ff00ff00ff00ff00ff00ff;\n" - " bit<128> m16= 0x0000ffff0000ffff0000ffff0000ffff;\n" - " bit<128> m32= 0x00000000ffffffff00000000ffffffff;\n" - " bit<128> m64= 0x0000000000000000ffffffffffffffff;\n\n") - - - - ingress.write(" action XNOR(bit<64> weight){\n" - " meta.XNOROutput = weight^meta.bnnInput;\n" - " meta.XNOROutput = ~meta.XNOROutput;\n" - " }\n\n") - - for l in range(config['num_layers']): - if l+1 == config['num_layers']: - break - if l==0: - threshold = np.sum(config['width']) / 2 - else: - threshold = config['num_hidden_nodes'][l-1] / 2 - ingress.write(" action BitCount_l"+str(l)+"(bit<64> bitInput){\n" - " bit<128> x= (bit<128>)bitInput;\n" - " x = (x & m1 ) + ((x >> 1) & m1 );\n" - " x = (x & m2 ) + ((x >> 2) & m2 );\n" - " x = (x & m4 ) + ((x >> 4) & m4 );\n" - " x = (x & m8 ) + ((x >> 8) & m8 );\n" - " x = (x & m16) + ((x >> 16) & m16);\n" - " x = (x & m32) + ((x >> 32) & m32);\n" - " x = (x & m64) + ((x >> 64) & m64);\n" - " meta.activated = (x>"+str(np.int(np.floor(threshold)))+") ? (bit<1>)1 : 0;\n" - " meta.NextLayerInput = meta.NextLayerInput<<1;\n" - " meta.NextLayerInput = meta.NextLayerInput + (bit<64>)meta.activated;\n" - " }\n\n") - - for c in range(config['num_classes']): - ingress.write(" action BitCount_c"+str(c)+"(bit<64> bitInput){\n" - " bit<128> x= (bit<128>)bitInput;\n" - " x = (x & m1 ) + ((x >> 1) & m1 );\n" - " x = (x & m2 ) + ((x >> 2) & m2 );\n" - " x = (x & m4 ) + ((x >> 4) & m4 );\n" - " x = (x & m8 ) + ((x >> 8) & m8 );\n" - " x = (x & m16) + ((x >> 16) & m16);\n" - " x = (x & m32) + ((x >> 32) & m32);\n" - " x = (x & m64) + ((x >> 64) & m64);\n" - " meta.middle_c"+str(c)+" = (bit<10>) x;\n" - " }\n\n") - - for l in range(config['num_layers']): - if l == 0: - num_zeros = np.int(128 - np.sum(config['width'])) - num_ones = np.int(np.sum(config['width'])) - bound = int("0b"+"0"*num_zeros+"1"*num_ones,2) - else: - num_zeros = np.int(128 - config['num_hidden_nodes'][l - 1]) - num_ones = np.int(config['num_hidden_nodes'][l - 1]) - bound = int("0b" + "0" * num_zeros + "1" * num_ones, 2) - if l==0: - ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" - # " bit < "+str(np.int(np.sum(config['width'])))+" > weight = 0;\n" - " bit <64> weight = 0;\n") - # " meta.NextLayerInput = 0;\n") - for h in range(config['num_hidden_nodes'][l]): - ingress.write(" weights.read( weight, (bit<32>)offset+"+str(h)+");\n" - " XNOR(weight);\n" - " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" - " BitCount_l"+str(l)+"(meta.XNOROutput);\n") - ingress.write(" }\n\n") - elif l+1==config['num_layers']: - ingress.write(" action Layer" + str(l) + "_Process(bit <10> offset){ \n" - " bit <64> weight = 0;\n") - # " meta.NextLayerInput = 0;\n") - for c in range(config['num_classes']): - ingress.write(" weights.read( weight, (bit<32>)offset+" + str(c) + ");\n" - " XNOR(weight);\n" - " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" - " BitCount_c"+str(c)+"(meta.XNOROutput);\n") - ingress.write(" }\n\n") - else: - ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" - " bit <64> weight = 0;\n" - " meta.NextLayerInput = 0;\n") - for h in range(config['num_hidden_nodes'][l]): - ingress.write(" weights.read(weight, (bit<32>)offset+"+str(h)+");\n" - " XNOR(weight);\n" - " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" - " BitCount_l"+str(l)+"(meta.XNOROutput);\n") - ingress.write(" }\n\n") - - ingress.write(" action compare(){\n") - write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) - ingress.write(" }\n\n") - - ingress.write(" action BuildInput(){\n") - for f in range(config['num_features']): - if f+1) hdr.Planter.feature"+str(f)+") << "+str(np.int(config['width'][f+1]))+";\n") - else: - ingress.write(" meta.bnnInput = (meta.bnnInput + (bit <64>) hdr.Planter.feature" + str(f) + ") ;\n") - - ingress.write(" }\n\n") - - ################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for idx in range(len(Exact_Table['weights'])): - file.write("register_write SwitchIngress.weights "+str(idx)+" "+str(Exact_Table['weights'][idx])+"\n") - - - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + +def write_compare(c_n, con_list, num_class, txt): + if c_n == num_class-1: + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + txt.write(" meta.compare" +str(int(compare[0])) +"_"+str(int(compare[1])) + +" = meta.middle_c" +str(int(compare[0])) +" - meta.middle_c"+str(int(compare[1]))+";\n") + + c_n += 1 + write_compare(c_n, con_list, num_class, txt) + c_n -= 1 + + return + + +def do_compare(c_n, con_list, num_class, txt, label, config): + if c_n == num_class-1: + txt.write(" "+c_n*" "+"hdr.Planter.result = "+str(int(label))+";\n" + " "+(c_n-1)*" "+"}\n") + return + else: + for con in ['if','else']: + con_list[c_n] = con + compare = [0,0] + for d in range(c_n): + if con_list[d] == 'if': + compare[0] = d+1 + compare[1] = c_n+1 + if con == 'if': + label = compare[1] + # print(con_list, c_n) + txt.write(" "+c_n*" "+con+"(meta.compare" + +str(int(compare[0]))+"_"+str(int(compare[1]))+"& 0b1" + +(10-1)*"0"+"!=0){\n") #<0 + else: + label = compare[0] + txt.write(" "+c_n*" "+con + "{\n") + c_n += 1 + do_compare(c_n, con_list, num_class, txt, label, config) + c_n -= 1 + if con == 'else' and c_n != 0: + txt.write(" " + (c_n-1) * " " + "}\n") + return + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_hidden_nodes'] = config_file['num hidden nodes'] + config['num_layers'] = config_file["number of layers"] + config['num_classes'] = config_file["number of classes"] + config['model'] = config_file['model'] + config['width'] = config_file["width of inputs"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for c in range(0, config['num_classes']): + headers.write(" bit<" + str(10) + "> middle_c" + str(c) + ";\n") + + for c in range(config['num_classes']): + for c1 in range(c + 1, config['num_classes']): + headers.write(" bit<" + str(10) + "> compare" + str(c) + "_" + str(c1) + ";\n") + + headers.write(" bit<64> bnnInput;\n" + " bit<64> XNOROutput;\n" + " bit<64> NextLayerInput;\n" + " bit<1> activated;\n" + " bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + ingress.write(" meta.bnnInput = 0;\n" + " meta.XNOROutput = 0;\n" + " meta.NextLayerInput = 0;\n" + " BuildInput();\n\n") + + count = 0 + for l in range(config['num_layers']): + ingress.write(" Layer"+str(l)+"_Process("+str(int(count))+");\n") + if l + 1 != config['num_layers']: + ingress.write(" meta.bnnInput = meta.NextLayerInput;\n" + " meta.NextLayerInput = 0;\n") + ingress.write("\n") + if l+1== config['num_layers']: + break + count+=config['num_hidden_nodes'][l] + + ingress.write(" compare();\n") + do_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress, 0, config) + + # ingress.write(" \n" + # " bit<48> tmp;\n" + # " /* Swap the MAC addresses */\n" + # " tmp = hdr.ethernet.dstAddr;\n" + # " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" + # " hdr.ethernet.srcAddr = tmp;\n" + # # " bit < 64 > weight = 0;\n" + # # " weights.read( weight, 0); \n" + # # " hdr.Planter.result = (bit<32>)meta.bnnInput;\n" + # # " hdr.Planter.result = debug;\n" + # # " hdr.Planter.result = (bit<32>)meta.middle_c2;\n" + # " send(ig_intr_md.ingress_port);\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + + + ingress.write(" register>(1024) weights;\n" + # " //bit<8> count = 0;\n" + # " bit<1> activated = 0;\n" + " bit<128> m1 = 0x55555555555555555555555555555555;\n" + " bit<128> m2 = 0x33333333333333333333333333333333;\n" + " bit<128> m4 = 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f;\n" + " bit<128> m8 = 0x00ff00ff00ff00ff00ff00ff00ff00ff;\n" + " bit<128> m16= 0x0000ffff0000ffff0000ffff0000ffff;\n" + " bit<128> m32= 0x00000000ffffffff00000000ffffffff;\n" + " bit<128> m64= 0x0000000000000000ffffffffffffffff;\n\n") + + + + ingress.write(" action XNOR(bit<64> weight){\n" + " meta.XNOROutput = weight^meta.bnnInput;\n" + " meta.XNOROutput = ~meta.XNOROutput;\n" + " }\n\n") + + for l in range(config['num_layers']): + if l+1 == config['num_layers']: + break + if l==0: + threshold = np.sum(config['width']) / 2 + else: + threshold = config['num_hidden_nodes'][l-1] / 2 + ingress.write(" action BitCount_l"+str(l)+"(bit<64> bitInput){\n" + " bit<128> x= (bit<128>)bitInput;\n" + " x = (x & m1 ) + ((x >> 1) & m1 );\n" + " x = (x & m2 ) + ((x >> 2) & m2 );\n" + " x = (x & m4 ) + ((x >> 4) & m4 );\n" + " x = (x & m8 ) + ((x >> 8) & m8 );\n" + " x = (x & m16) + ((x >> 16) & m16);\n" + " x = (x & m32) + ((x >> 32) & m32);\n" + " x = (x & m64) + ((x >> 64) & m64);\n" + " meta.activated = (x>"+str(np.int(np.floor(threshold)))+") ? (bit<1>)1 : 0;\n" + " meta.NextLayerInput = meta.NextLayerInput<<1;\n" + " meta.NextLayerInput = meta.NextLayerInput + (bit<64>)meta.activated;\n" + " }\n\n") + + for c in range(config['num_classes']): + ingress.write(" action BitCount_c"+str(c)+"(bit<64> bitInput){\n" + " bit<128> x= (bit<128>)bitInput;\n" + " x = (x & m1 ) + ((x >> 1) & m1 );\n" + " x = (x & m2 ) + ((x >> 2) & m2 );\n" + " x = (x & m4 ) + ((x >> 4) & m4 );\n" + " x = (x & m8 ) + ((x >> 8) & m8 );\n" + " x = (x & m16) + ((x >> 16) & m16);\n" + " x = (x & m32) + ((x >> 32) & m32);\n" + " x = (x & m64) + ((x >> 64) & m64);\n" + " meta.middle_c"+str(c)+" = (bit<10>) x;\n" + " }\n\n") + + for l in range(config['num_layers']): + if l == 0: + num_zeros = np.int(128 - np.sum(config['width'])) + num_ones = np.int(np.sum(config['width'])) + bound = int("0b"+"0"*num_zeros+"1"*num_ones,2) + else: + num_zeros = np.int(128 - config['num_hidden_nodes'][l - 1]) + num_ones = np.int(config['num_hidden_nodes'][l - 1]) + bound = int("0b" + "0" * num_zeros + "1" * num_ones, 2) + if l==0: + ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" + # " bit < "+str(np.int(np.sum(config['width'])))+" > weight = 0;\n" + " bit <64> weight = 0;\n") + # " meta.NextLayerInput = 0;\n") + for h in range(config['num_hidden_nodes'][l]): + ingress.write(" weights.read( weight, (bit<32>)offset+"+str(h)+");\n" + " XNOR(weight);\n" + " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" + " BitCount_l"+str(l)+"(meta.XNOROutput);\n") + ingress.write(" }\n\n") + elif l+1==config['num_layers']: + ingress.write(" action Layer" + str(l) + "_Process(bit <10> offset){ \n" + " bit <64> weight = 0;\n") + # " meta.NextLayerInput = 0;\n") + for c in range(config['num_classes']): + ingress.write(" weights.read( weight, (bit<32>)offset+" + str(c) + ");\n" + " XNOR(weight);\n" + " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" + " BitCount_c"+str(c)+"(meta.XNOROutput);\n") + ingress.write(" }\n\n") + else: + ingress.write(" action Layer"+str(l)+"_Process(bit <10> offset){ \n" + " bit <64> weight = 0;\n" + " meta.NextLayerInput = 0;\n") + for h in range(config['num_hidden_nodes'][l]): + ingress.write(" weights.read(weight, (bit<32>)offset+"+str(h)+");\n" + " XNOR(weight);\n" + " meta.XNOROutput = (bit<64>)meta.XNOROutput["+str(num_ones-1)+":0];\n" + " BitCount_l"+str(l)+"(meta.XNOROutput);\n") + ingress.write(" }\n\n") + + ingress.write(" action compare(){\n") + write_compare(0, (np.ones(config['num_classes'])).tolist(), config['num_classes'], ingress) + ingress.write(" }\n\n") + + ingress.write(" action BuildInput(){\n") + for f in range(config['num_features']): + if f+1) hdr.Planter.feature"+str(f)+") << "+str(np.int(config['width'][f+1]))+";\n") + else: + ingress.write(" meta.bnnInput = (meta.bnnInput + (bit <64>) hdr.Planter.feature" + str(f) + ") ;\n") + + ingress.write(" }\n\n") + + ################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for idx in range(len(Exact_Table['weights'])): + file.write("register_write SwitchIngress.weights "+str(idx)+" "+str(Exact_Table['weights'][idx])+"\n") + + + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + diff --git a/src/models/NN/Type_DM/readme.md b/src/models/NN/Type_DM/readme.md index 955597d..0a8514a 100644 --- a/src/models/NN/Type_DM/readme.md +++ b/src/models/NN/Type_DM/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for NN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for NN. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/NN/Type_DM/table_generator.py b/src/models/NN/Type_DM/table_generator.py index efa3ff6..e3e78c9 100755 --- a/src/models/NN/Type_DM/table_generator.py +++ b/src/models/NN/Type_DM/table_generator.py @@ -1,302 +1,302 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import math - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os -from src.functions.logic_gates import * -import time - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - - -import copy -import json -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -from src.models.NN.Type_1.BinaryNet.models.xnor_layers import * -from src.models.NN.Type_1.BinaryNet.models.xnor_mlp import * -from sklearn.metrics import * -import torch.nn as nn -from torch.autograd import Variable as V -from torch.utils.data import DataLoader, Dataset, TensorDataset -import torch -from src.models.NN.Type_1.BinaryNet.models import * -from src.models.NN.Type_1.BinaryNet.classifiers.xnor_classifier import * -from src.functions.numeric_conversion import * - - -def bintoint(binary): - number = 0 - for b in binary: - number = (2 * number) + int(b) - return number - - -def convert_weight_to_register_data(weight_data): - weight = [] - weights = [] - for i in weight_data: - for j in i: - if j < 0: - weight.append(0) - else: - weight.append(1) - weights.append(bintoint(weight)) - weight.clear() - return weights - - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') - Planter_config['model config']['batch size'] = int(input('- Model batch size? (default = 10) ') or '10') - Planter_config['model config']['num epoch'] = int(input('- Number of training epoch? (default = 15) ') or '15') - Planter_config['model config']['number of layers'] = int(input('- Number of layers? (default = 3) ') or '3') - Planter_config['model config']['num hidden nodes'] = [] - for l in range(Planter_config['model config']['number of layers']-1): - Planter_config['model config']['num hidden nodes'] += [int(input('- Number of hidden nodes for layer '+str(l+1)+'? (default = 56) ') or '56')] - - num_layers = Planter_config['model config']['number of layers'] - num_hidden_nodes = Planter_config['model config']['num hidden nodes'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - learning_rate = Planter_config['model config']['learning rate'] - batch_size = Planter_config['model config']['batch size'] - num_epoch = Planter_config['model config']['num epoch'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - width = [] - for f in range(num_features): - width += [np.ceil(math.log(feature_max[f],2))] - width_row = int(np.sum(width)) - - - total_count = np.shape(train_X.values)[0] + np.shape(test_X.values)[0] - count = 0 - - train_X_new = [] - test_X_new = [] - for i in range(np.shape(train_X.values)[0]): - flag = 0 - row = (np.zeros(int(width_row))) - for f in range(num_features): - code = ten_to_bin(train_X.values[i][f],width[f]) - for d in range(int(width[f])): - row[flag] = int(code[d]) - flag += 1 - train_X_new += [row] - # ====================== - count += 1 - percent = int(np.ceil(50 * count / total_count)) - print('\rProcessing the raw Data [' + percent * '#' + (50 - percent) * '-' + '] ' + str( int(np.round(100 * count / total_count))) + "%", end="") - # ====================== - train_X_new = np.array(train_X_new) - - for i in range(np.shape(test_X.values)[0]): - flag = 0 - row = (np.zeros(int(width_row))) - for f in range(num_features): - code = ten_to_bin(test_X.values[i][f],width[f]) - for d in range(int(width[f])): - row[flag] = int(code[d]) - flag += 1 - test_X_new += [row] - # ====================== - count += 1 - percent = int(np.ceil(50 * count / total_count)) - print('\rProcessing the raw data [' + percent * '#' + (50 - percent) * '-' + '] ' + str(int(np.round(100 * count / total_count))) + "%", end="") - # ====================== - test_X_new = np.array(test_X_new) - print('\nData set is ready') - - - ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 - tensor_x = torch.from_numpy(train_X_new.astype(np.float32)) - tensor_y = torch.LongTensor(train_y.astype(np.float32)) - test_X = torch.from_numpy(test_X_new.astype(np.float32)) - test_y = torch.LongTensor(test_y.astype(np.float32)) - my_train_dataset = TensorDataset(tensor_x, tensor_y) - my_test_dataset = TensorDataset(test_X, test_y) - train_loader = DataLoader(my_train_dataset, batch_size=batch_size, shuffle=False) - test_loader = DataLoader(my_test_dataset, batch_size=batch_size, shuffle=False) - - cuda = torch.cuda.is_available() - device = torch.device('cuda' if cuda else 'cpu') - torch.manual_seed(0) - if cuda: - torch.backends.cudnn.deterministic = True - torch.cuda.manual_seed(0) - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - model = eval('mlp')(width_row, num_hidden_nodes, num_layers, num_classes) - model.to(device) - - classification = XnorClassifier(model, train_loader, test_loader, device) - - criterion = torch.nn.CrossEntropyLoss() - criterion.to(device) - - if hasattr(model, 'init_w'): - model.init_w() - - - optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) - - scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [100, 200] , gamma=0.1) - - classification.train(criterion, optimizer, num_epoch, scheduler, Planter_config['directory config']['work']+'/src/temp/mlp') - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - Exact_Table = {} - - Exact_Table['weights'] = [] - for l in range(num_layers): - Exact_Table['weights'] += convert_weight_to_register_data(model.classifier._modules['layer'+str(l)].weight.detach().numpy()) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "NN" - Planter_config['p4 config']["num hidden nodes"] = num_hidden_nodes - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of layers"] = num_layers - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["width of inputs"] = width - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return test_y.tolist() - -def bits_on_count(x): - return sum(c=='1' for c in bin(x)) - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_hidden_nodes = Planter_config['p4 config']["num hidden nodes"] - num_layers = Planter_config['p4 config']["number of layers"] - width = Planter_config['p4 config']["width of inputs"] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - - correct = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - input = '' - for f in range(num_features): - input += ten_to_bin(test_X.values[i][f],width[f]) - input = int(input, 2) - # print(' - ', input) - node_num = 0 - for l in range(num_layers): - if l == 0: - num_bits = np.int(np.sum(width)) - else: - num_bits = np.int(num_hidden_nodes[l - 1]) - next_layer_input = '' - if l+1 != num_layers: - for n in range(num_hidden_nodes[l]): - - value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) - - value = bits_on_count(value) - - node_num += 1 - if l==0: - threshold = np.floor(np.sum(width)/2) - else: - threshold = np.floor(num_hidden_nodes[l-1]/2) - if value> threshold: - next_layer_input += '1' - else: - next_layer_input += '0' - - - input = int(next_layer_input,2) - - else: - result = np.zeros(num_classes).tolist() - for c in range(num_classes): - # value = XNOR(input, Exact_Table['weights'][node_num]) - value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) - value = bits_on_count(value) - result[c] = copy.deepcopy(value) - node_num += 1 - # print(result) - - switch_prediction = result.index(np.max(result)) - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, M/A format macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), accuracy_score(switch_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of NN is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import math + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os +from src.functions.logic_gates import * +import time + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + + +import copy +import json +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +from src.models.NN.Type_1.BinaryNet.models.xnor_layers import * +from src.models.NN.Type_1.BinaryNet.models.xnor_mlp import * +from sklearn.metrics import * +import torch.nn as nn +from torch.autograd import Variable as V +from torch.utils.data import DataLoader, Dataset, TensorDataset +import torch +from src.models.NN.Type_1.BinaryNet.models import * +from src.models.NN.Type_1.BinaryNet.classifiers.xnor_classifier import * +from src.functions.numeric_conversion import * + + +def bintoint(binary): + number = 0 + for b in binary: + number = (2 * number) + int(b) + return number + + +def convert_weight_to_register_data(weight_data): + weight = [] + weights = [] + for i in weight_data: + for j in i: + if j < 0: + weight.append(0) + else: + weight.append(1) + weights.append(bintoint(weight)) + weight.clear() + return weights + + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + Planter_config['model config']['learning rate'] = np.float(input('- Model learning rate? (default = 0.01) ') or '0.01') + Planter_config['model config']['batch size'] = int(input('- Model batch size? (default = 10) ') or '10') + Planter_config['model config']['num epoch'] = int(input('- Number of training epoch? (default = 15) ') or '15') + Planter_config['model config']['number of layers'] = int(input('- Number of layers? (default = 3) ') or '3') + Planter_config['model config']['num hidden nodes'] = [] + for l in range(Planter_config['model config']['number of layers']-1): + Planter_config['model config']['num hidden nodes'] += [int(input('- Number of hidden nodes for layer '+str(l+1)+'? (default = 56) ') or '56')] + + num_layers = Planter_config['model config']['number of layers'] + num_hidden_nodes = Planter_config['model config']['num hidden nodes'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + learning_rate = Planter_config['model config']['learning rate'] + batch_size = Planter_config['model config']['batch size'] + num_epoch = Planter_config['model config']['num epoch'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + width = [] + for f in range(num_features): + width += [np.ceil(math.log(feature_max[f],2))] + width_row = int(np.sum(width)) + + + total_count = np.shape(train_X.values)[0] + np.shape(test_X.values)[0] + count = 0 + + train_X_new = [] + test_X_new = [] + for i in range(np.shape(train_X.values)[0]): + flag = 0 + row = (np.zeros(int(width_row))) + for f in range(num_features): + code = ten_to_bin(train_X.values[i][f],width[f]) + for d in range(int(width[f])): + row[flag] = int(code[d]) + flag += 1 + train_X_new += [row] + # ====================== + count += 1 + percent = int(np.ceil(50 * count / total_count)) + print('\rProcessing the raw Data [' + percent * '#' + (50 - percent) * '-' + '] ' + str( int(np.round(100 * count / total_count))) + "%", end="") + # ====================== + train_X_new = np.array(train_X_new) + + for i in range(np.shape(test_X.values)[0]): + flag = 0 + row = (np.zeros(int(width_row))) + for f in range(num_features): + code = ten_to_bin(test_X.values[i][f],width[f]) + for d in range(int(width[f])): + row[flag] = int(code[d]) + flag += 1 + test_X_new += [row] + # ====================== + count += 1 + percent = int(np.ceil(50 * count / total_count)) + print('\rProcessing the raw data [' + percent * '#' + (50 - percent) * '-' + '] ' + str(int(np.round(100 * count / total_count))) + "%", end="") + # ====================== + test_X_new = np.array(test_X_new) + print('\nData set is ready') + + + ###### Convert input data to the dataset type accepted by the neural network, set batch size to 10 + tensor_x = torch.from_numpy(train_X_new.astype(np.float32)) + tensor_y = torch.LongTensor(train_y.astype(np.float32)) + test_X = torch.from_numpy(test_X_new.astype(np.float32)) + test_y = torch.LongTensor(test_y.astype(np.float32)) + my_train_dataset = TensorDataset(tensor_x, tensor_y) + my_test_dataset = TensorDataset(test_X, test_y) + train_loader = DataLoader(my_train_dataset, batch_size=batch_size, shuffle=False) + test_loader = DataLoader(my_test_dataset, batch_size=batch_size, shuffle=False) + + cuda = torch.cuda.is_available() + device = torch.device('cuda' if cuda else 'cpu') + torch.manual_seed(0) + if cuda: + torch.backends.cudnn.deterministic = True + torch.cuda.manual_seed(0) + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + model = eval('mlp')(width_row, num_hidden_nodes, num_layers, num_classes) + model.to(device) + + classification = XnorClassifier(model, train_loader, test_loader, device) + + criterion = torch.nn.CrossEntropyLoss() + criterion.to(device) + + if hasattr(model, 'init_w'): + model.init_w() + + + optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) + + scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [100, 200] , gamma=0.1) + + classification.train(criterion, optimizer, num_epoch, scheduler, Planter_config['directory config']['work']+'/src/temp/mlp') + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + Exact_Table = {} + + Exact_Table['weights'] = [] + for l in range(num_layers): + Exact_Table['weights'] += convert_weight_to_register_data(model.classifier._modules['layer'+str(l)].weight.detach().numpy()) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "NN" + Planter_config['p4 config']["num hidden nodes"] = num_hidden_nodes + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of layers"] = num_layers + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["width of inputs"] = width + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return test_y.tolist() + +def bits_on_count(x): + return sum(c=='1' for c in bin(x)) + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_hidden_nodes = Planter_config['p4 config']["num hidden nodes"] + num_layers = Planter_config['p4 config']["number of layers"] + width = Planter_config['p4 config']["width of inputs"] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + + correct = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + input = '' + for f in range(num_features): + input += ten_to_bin(test_X.values[i][f],width[f]) + input = int(input, 2) + # print(' - ', input) + node_num = 0 + for l in range(num_layers): + if l == 0: + num_bits = np.int(np.sum(width)) + else: + num_bits = np.int(num_hidden_nodes[l - 1]) + next_layer_input = '' + if l+1 != num_layers: + for n in range(num_hidden_nodes[l]): + + value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) + + value = bits_on_count(value) + + node_num += 1 + if l==0: + threshold = np.floor(np.sum(width)/2) + else: + threshold = np.floor(num_hidden_nodes[l-1]/2) + if value> threshold: + next_layer_input += '1' + else: + next_layer_input += '0' + + + input = int(next_layer_input,2) + + else: + result = np.zeros(num_classes).tolist() + for c in range(num_classes): + # value = XNOR(input, Exact_Table['weights'][node_num]) + value = XNOR_with_bits(input, Exact_Table['weights'][node_num], num_bits) + value = bits_on_count(value) + result[c] = copy.deepcopy(value) + node_num += 1 + # print(result) + + switch_prediction = result.index(np.max(result)) + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, M/A format macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), accuracy_score(switch_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of NN is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) print('\n', result) \ No newline at end of file diff --git a/src/models/NN/readme.md b/src/models/NN/readme.md index 8ac4737..a6bb8de 100644 --- a/src/models/NN/readme.md +++ b/src/models/NN/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported NN. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported NN. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/PCA/Type_1/dedicated_p4.py b/src/models/PCA/Type_1/dedicated_p4.py index 2c895a1..4eef524 100755 --- a/src/models/PCA/Type_1/dedicated_p4.py +++ b/src/models/PCA/Type_1/dedicated_p4.py @@ -1,237 +1,237 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - config['num_axis'] = Planter_config['p4 config']["num components"] - config['model_type'] = Planter_config['test config']['type of test'] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for ax in range(0, config['num_axis']): - # headers.write(" bit<" + str(config['num_bits']) + "> middle_ax" + str(ax) + ";\n") - headers.write(" bit<32> middle_ax" + str(ax) + ";\n") - - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for ax in range(0, config['num_axis']): - if ax==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) - ingress.write("){\n") - - if f==0: - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+" = (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") - else: - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { meta.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action wrap_back( ){\n") - for ax in range(0, config['num_axis']): - ingress.write(" meta.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") - ingress.write(" }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - # ingress.write(" bias.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" wrap_back();\n") - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature" + str(f) - Entry["match"] = {} - Entry["match"]["meta.feature" + str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) - Entry["action_params"] = {} - for ax in range(num_components): - Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - - - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_components = config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) - for ax in range(num_components): - label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] - file.write( str(label) + " ") - file.write("\n") - file.write("\n") - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for ax in range(0, config['num_axis']): - if ax==0: - tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + config['num_axis'] = Planter_config['p4 config']["num components"] + config['model_type'] = Planter_config['test config']['type of test'] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for ax in range(0, config['num_axis']): + # headers.write(" bit<" + str(config['num_bits']) + "> middle_ax" + str(ax) + ";\n") + headers.write(" bit<32> middle_ax" + str(ax) + ";\n") + + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for ax in range(0, config['num_axis']): + if ax==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) + ingress.write("){\n") + + if f==0: + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+" = (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") + else: + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { meta.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action wrap_back( ){\n") + for ax in range(0, config['num_axis']): + ingress.write(" meta.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") + ingress.write(" }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + # ingress.write(" bias.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" wrap_back();\n") + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature" + str(f) + Entry["match"] = {} + Entry["match"]["meta.feature" + str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) + Entry["action_params"] = {} + for ax in range(num_components): + Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + + + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_components = config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) + for ax in range(num_components): + label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] + file.write( str(label) + " ") + file.write("\n") + file.write("\n") + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for ax in range(0, config['num_axis']): + if ax==0: + tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + diff --git a/src/models/PCA/Type_1/readme.md b/src/models/PCA/Type_1/readme.md index a85ed65..1c86b24 100644 --- a/src/models/PCA/Type_1/readme.md +++ b/src/models/PCA/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for PCA. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for PCA. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/PCA/Type_1/table_generator.py b/src/models/PCA/Type_1/table_generator.py index 033dd4c..4105345 100755 --- a/src/models/PCA/Type_1/table_generator.py +++ b/src/models/PCA/Type_1/table_generator.py @@ -1,194 +1,194 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -from sklearn.decomposition import PCA -from mpl_toolkits.mplot3d import Axes3D -import json -import matplotlib.pyplot as plt -import copy -from src.functions.json_encoder import * -from src.functions.normalization import * -from sklearn.metrics.pairwise import cosine_similarity -from scipy.stats import pearsonr -import math -import time - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - Planter_config['model config']['num components'] = np.int(input('- Number components? (default = 2) ') or '2') - Planter_config['model config']['number of bits'] = np.int( input('- Number of bits for each action data? (default = 16) ') or '16') - - num_bits = Planter_config['model config']['number of bits'] - - num_components = Planter_config['model config']['num components'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - pca = PCA(n_components=num_components) - pca.fit(train_X) - sklearn_X_new = pca.transform(test_X) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - model_info = {} - model_info['means'] = pca.mean_ - model_info['components'] = pca.components_.T - - value_info = {} - value_info["max"] = 0 - value_info["min"] = 0 - for ax in range(num_components): - value_info["ax "+str(ax)] = {} - value_info["ax " + str(ax)]["max"] = 0 - value_info["ax " + str(ax)]["min"] = 0 - - PCA_Table = {} - for f in range(num_features): - PCA_Table['feature '+str(f)] = {} - for input_value in range(feature_max[f]): - PCA_Table['feature ' + str(f)][input_value] = {} - value = input_value - model_info['means'][f] - for ax in range(num_components): - middle_value = copy.deepcopy(value*model_info['components'][f,ax]) - PCA_Table['feature ' + str(f)][input_value]['ax'+str(ax)] = middle_value - if middle_value > value_info["ax " + str(ax)]["max"]: - value_info["ax " + str(ax)]["max"] = middle_value - if middle_value < value_info["ax " + str(ax)]["min"]: - value_info["ax " + str(ax)]["min"] = middle_value - if middle_value > value_info["max"]: - value_info["max"] = middle_value - if middle_value < value_info["min"]: - value_info["min"] = middle_value - - scale = (2**num_bits)/((value_info["max"]-value_info["min"])*(num_features)) - - - Exact_Table = {} - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for input_value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][input_value] = {} - for ax in range(num_components): - middle_value = copy.deepcopy(PCA_Table['feature ' + str(f)][input_value]['ax' + str(ax)]) - middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) - Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "PCA" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['p4 config']["num components"] = num_components - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'dimension_reduction' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - X_new = copy.deepcopy(sklearn_X_new) - - for ax in range(num_components): - X_new[:, ax] = sklearn_X_new[:, ax] - num_features*(value_info["min"]) - for ax in range(num_components): - corr, _ = pearsonr(X_new[:, ax],sklearn_X_new[:, ax]) - print('Pearsons correlation for axis '+str(ax)+' is: %.3f' % corr) - - return X_new - - - -def test_tables(sklearn_test_x, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_x = copy.deepcopy(sklearn_test_x) - - - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - for ax in range(num_components): - switch_test_x[i][ax] = 0 - for f in range(num_features): - ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] - for ax in range(num_components): - switch_test_x[i][ax] += ax_middle["ax"+str(ax)] - - for ax in range(num_components): - - corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) - print('Pearsons correlation of M/A PCA and output of Sklearn for axis '+str(ax)+' is: %.4f' % corr) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +from sklearn.decomposition import PCA +from mpl_toolkits.mplot3d import Axes3D +import json +import matplotlib.pyplot as plt +import copy +from src.functions.json_encoder import * +from src.functions.normalization import * +from sklearn.metrics.pairwise import cosine_similarity +from scipy.stats import pearsonr +import math +import time + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + Planter_config['model config']['num components'] = np.int(input('- Number components? (default = 2) ') or '2') + Planter_config['model config']['number of bits'] = np.int( input('- Number of bits for each action data? (default = 16) ') or '16') + + num_bits = Planter_config['model config']['number of bits'] + + num_components = Planter_config['model config']['num components'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + pca = PCA(n_components=num_components) + pca.fit(train_X) + sklearn_X_new = pca.transform(test_X) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + model_info = {} + model_info['means'] = pca.mean_ + model_info['components'] = pca.components_.T + + value_info = {} + value_info["max"] = 0 + value_info["min"] = 0 + for ax in range(num_components): + value_info["ax "+str(ax)] = {} + value_info["ax " + str(ax)]["max"] = 0 + value_info["ax " + str(ax)]["min"] = 0 + + PCA_Table = {} + for f in range(num_features): + PCA_Table['feature '+str(f)] = {} + for input_value in range(feature_max[f]): + PCA_Table['feature ' + str(f)][input_value] = {} + value = input_value - model_info['means'][f] + for ax in range(num_components): + middle_value = copy.deepcopy(value*model_info['components'][f,ax]) + PCA_Table['feature ' + str(f)][input_value]['ax'+str(ax)] = middle_value + if middle_value > value_info["ax " + str(ax)]["max"]: + value_info["ax " + str(ax)]["max"] = middle_value + if middle_value < value_info["ax " + str(ax)]["min"]: + value_info["ax " + str(ax)]["min"] = middle_value + if middle_value > value_info["max"]: + value_info["max"] = middle_value + if middle_value < value_info["min"]: + value_info["min"] = middle_value + + scale = (2**num_bits)/((value_info["max"]-value_info["min"])*(num_features)) + + + Exact_Table = {} + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for input_value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][input_value] = {} + for ax in range(num_components): + middle_value = copy.deepcopy(PCA_Table['feature ' + str(f)][input_value]['ax' + str(ax)]) + middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) + Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "PCA" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['p4 config']["num components"] = num_components + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'dimension_reduction' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + X_new = copy.deepcopy(sklearn_X_new) + + for ax in range(num_components): + X_new[:, ax] = sklearn_X_new[:, ax] - num_features*(value_info["min"]) + for ax in range(num_components): + corr, _ = pearsonr(X_new[:, ax],sklearn_X_new[:, ax]) + print('Pearsons correlation for axis '+str(ax)+' is: %.3f' % corr) + + return X_new + + + +def test_tables(sklearn_test_x, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_x = copy.deepcopy(sklearn_test_x) + + + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + for ax in range(num_components): + switch_test_x[i][ax] = 0 + for f in range(num_features): + ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] + for ax in range(num_components): + switch_test_x[i][ax] += ax_middle["ax"+str(ax)] + + for ax in range(num_components): + + corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) + print('Pearsons correlation of M/A PCA and output of Sklearn for axis '+str(ax)+' is: %.4f' % corr) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/PCA/Type_LB/dedicated_p4.py b/src/models/PCA/Type_LB/dedicated_p4.py index 21bca98..9422d56 100755 --- a/src/models/PCA/Type_LB/dedicated_p4.py +++ b/src/models/PCA/Type_LB/dedicated_p4.py @@ -1,241 +1,241 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - config['num_axis'] = Planter_config['p4 config']["num components"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - - for ax in range(0, config['num_axis']): - # headers.write(" bit<" + str(config['num_bits']) + "> middle_ax" + str(ax) + ";\n") - headers.write(" bit<32> middle_ax" + str(ax) + ";\n") - - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for ax in range(0, config['num_axis']): - if ax==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) - ingress.write("){\n") - - if f==0: - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+" = (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") - else: - for ax in range(0, config['num_axis']): - ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action wrap_back( ){\n") - for ax in range(0, config['num_axis']): - ingress.write(" hdr.Planter.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") - ingress.write(" }\n\n") - - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - - # ingress.write(" bias.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - ingress.write(" wrap_back();\n") - # ingress.write(" /* Swap the MAC addresses */\n" - # " bit<48> tmp;\n" - # " tmp = hdr.ethernet.dstAddr;\n" - # " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" - # " hdr.ethernet.srcAddr = tmp;\n" - # # " send(3);\n") - # " send(ig_intr_md.ingress_port);\n") - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature" + str(f) - Entry["match"] = {} - Entry["match"]["hdr.Planter.feature" + str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) - Entry["action_params"] = {} - for ax in range(num_components): - Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_components = config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) - for ax in range(num_components): - label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] - file.write( str(label) + " ") - file.write("\n") - file.write("\n") - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['feature " + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for ax in range(0, config['num_axis']): - if ax==0: - tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") - else: - tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + config['num_axis'] = Planter_config['p4 config']["num components"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + + for ax in range(0, config['num_axis']): + # headers.write(" bit<" + str(config['num_bits']) + "> middle_ax" + str(ax) + ";\n") + headers.write(" bit<32> middle_ax" + str(ax) + ";\n") + + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for ax in range(0, config['num_axis']): + if ax==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "ax" + str(ax)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"ax"+str(ax)) + ingress.write("){\n") + + if f==0: + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+" = (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") + else: + for ax in range(0, config['num_axis']): + ingress.write(" meta.middle_ax" + str(ax)+" = meta.middle_ax" + str(ax)+" + (bit<32>)f"+str(f)+"ax"+str(ax) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action wrap_back( ){\n") + for ax in range(0, config['num_axis']): + ingress.write(" hdr.Planter.feature"+str(ax)+" = meta.middle_ax" + str(ax) + ";\n") + ingress.write(" }\n\n") + + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + + # ingress.write(" bias.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + ingress.write(" wrap_back();\n") + # ingress.write(" /* Swap the MAC addresses */\n" + # " bit<48> tmp;\n" + # " tmp = hdr.ethernet.dstAddr;\n" + # " hdr.ethernet.dstAddr = hdr.ethernet.srcAddr;\n" + # " hdr.ethernet.srcAddr = tmp;\n" + # # " send(3);\n") + # " send(ig_intr_md.ingress_port);\n") + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature" + str(f) + Entry["match"] = {} + Entry["match"]["hdr.Planter.feature" + str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature" + str(f) + Entry["action_params"] = {} + for ax in range(num_components): + Entry["action_params"]["f" + str(f) + "ax" + str(ax)] = Exact_Table['feature ' + str(f)][idx]["ax" + str(ax)] + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_components = config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + " " + str(key) + " => " ) + for ax in range(num_components): + label = Exact_Table['feature ' + str(f)][idx][ "ax" + str(ax)] + file.write( str(label) + " ") + file.write("\n") + file.write("\n") + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['feature " + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['feature " + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for ax in range(0, config['num_axis']): + if ax==0: + tload.write("table['feature " + str(f) + "'][key]['ax" + str(ax) + "']") + else: + tload.write(", table['feature "+str(f)+"'][key]['ax"+str(ax)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + diff --git a/src/models/PCA/Type_LB/readme.md b/src/models/PCA/Type_LB/readme.md index a85ed65..1c86b24 100644 --- a/src/models/PCA/Type_LB/readme.md +++ b/src/models/PCA/Type_LB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for PCA. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for PCA. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/PCA/Type_LB/table_generator.py b/src/models/PCA/Type_LB/table_generator.py index 033dd4c..4105345 100755 --- a/src/models/PCA/Type_LB/table_generator.py +++ b/src/models/PCA/Type_LB/table_generator.py @@ -1,194 +1,194 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -from sklearn.decomposition import PCA -from mpl_toolkits.mplot3d import Axes3D -import json -import matplotlib.pyplot as plt -import copy -from src.functions.json_encoder import * -from src.functions.normalization import * -from sklearn.metrics.pairwise import cosine_similarity -from scipy.stats import pearsonr -import math -import time - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - Planter_config['model config']['num components'] = np.int(input('- Number components? (default = 2) ') or '2') - Planter_config['model config']['number of bits'] = np.int( input('- Number of bits for each action data? (default = 16) ') or '16') - - num_bits = Planter_config['model config']['number of bits'] - - num_components = Planter_config['model config']['num components'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - pca = PCA(n_components=num_components) - pca.fit(train_X) - sklearn_X_new = pca.transform(test_X) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - - model_info = {} - model_info['means'] = pca.mean_ - model_info['components'] = pca.components_.T - - value_info = {} - value_info["max"] = 0 - value_info["min"] = 0 - for ax in range(num_components): - value_info["ax "+str(ax)] = {} - value_info["ax " + str(ax)]["max"] = 0 - value_info["ax " + str(ax)]["min"] = 0 - - PCA_Table = {} - for f in range(num_features): - PCA_Table['feature '+str(f)] = {} - for input_value in range(feature_max[f]): - PCA_Table['feature ' + str(f)][input_value] = {} - value = input_value - model_info['means'][f] - for ax in range(num_components): - middle_value = copy.deepcopy(value*model_info['components'][f,ax]) - PCA_Table['feature ' + str(f)][input_value]['ax'+str(ax)] = middle_value - if middle_value > value_info["ax " + str(ax)]["max"]: - value_info["ax " + str(ax)]["max"] = middle_value - if middle_value < value_info["ax " + str(ax)]["min"]: - value_info["ax " + str(ax)]["min"] = middle_value - if middle_value > value_info["max"]: - value_info["max"] = middle_value - if middle_value < value_info["min"]: - value_info["min"] = middle_value - - scale = (2**num_bits)/((value_info["max"]-value_info["min"])*(num_features)) - - - Exact_Table = {} - for f in range(num_features): - Exact_Table['feature ' + str(f)] = {} - for input_value in range(feature_max[f]): - Exact_Table['feature ' + str(f)][input_value] = {} - for ax in range(num_components): - middle_value = copy.deepcopy(PCA_Table['feature ' + str(f)][input_value]['ax' + str(ax)]) - middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) - Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "PCA" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['p4 config']["num components"] = num_components - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'dimension_reduction' - - json.dump(Planter_config, - open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, - cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - X_new = copy.deepcopy(sklearn_X_new) - - for ax in range(num_components): - X_new[:, ax] = sklearn_X_new[:, ax] - num_features*(value_info["min"]) - for ax in range(num_components): - corr, _ = pearsonr(X_new[:, ax],sklearn_X_new[:, ax]) - print('Pearsons correlation for axis '+str(ax)+' is: %.3f' % corr) - - return X_new - - - -def test_tables(sklearn_test_x, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_components = Planter_config['model config']['num components'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_x = copy.deepcopy(sklearn_test_x) - - - - for i in range(np.shape(test_X.values)[0]): - input_feature_value = test_X.values[i] - for ax in range(num_components): - switch_test_x[i][ax] = 0 - for f in range(num_features): - ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] - for ax in range(num_components): - switch_test_x[i][ax] += ax_middle["ax"+str(ax)] - - for ax in range(num_components): - - corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) - print('Pearsons correlation of M/A PCA and output of Sklearn for axis '+str(ax)+' is: %.4f' % corr) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +from sklearn.decomposition import PCA +from mpl_toolkits.mplot3d import Axes3D +import json +import matplotlib.pyplot as plt +import copy +from src.functions.json_encoder import * +from src.functions.normalization import * +from sklearn.metrics.pairwise import cosine_similarity +from scipy.stats import pearsonr +import math +import time + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + Planter_config['model config']['num components'] = np.int(input('- Number components? (default = 2) ') or '2') + Planter_config['model config']['number of bits'] = np.int( input('- Number of bits for each action data? (default = 16) ') or '16') + + num_bits = Planter_config['model config']['number of bits'] + + num_components = Planter_config['model config']['num components'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + pca = PCA(n_components=num_components) + pca.fit(train_X) + sklearn_X_new = pca.transform(test_X) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + + model_info = {} + model_info['means'] = pca.mean_ + model_info['components'] = pca.components_.T + + value_info = {} + value_info["max"] = 0 + value_info["min"] = 0 + for ax in range(num_components): + value_info["ax "+str(ax)] = {} + value_info["ax " + str(ax)]["max"] = 0 + value_info["ax " + str(ax)]["min"] = 0 + + PCA_Table = {} + for f in range(num_features): + PCA_Table['feature '+str(f)] = {} + for input_value in range(feature_max[f]): + PCA_Table['feature ' + str(f)][input_value] = {} + value = input_value - model_info['means'][f] + for ax in range(num_components): + middle_value = copy.deepcopy(value*model_info['components'][f,ax]) + PCA_Table['feature ' + str(f)][input_value]['ax'+str(ax)] = middle_value + if middle_value > value_info["ax " + str(ax)]["max"]: + value_info["ax " + str(ax)]["max"] = middle_value + if middle_value < value_info["ax " + str(ax)]["min"]: + value_info["ax " + str(ax)]["min"] = middle_value + if middle_value > value_info["max"]: + value_info["max"] = middle_value + if middle_value < value_info["min"]: + value_info["min"] = middle_value + + scale = (2**num_bits)/((value_info["max"]-value_info["min"])*(num_features)) + + + Exact_Table = {} + for f in range(num_features): + Exact_Table['feature ' + str(f)] = {} + for input_value in range(feature_max[f]): + Exact_Table['feature ' + str(f)][input_value] = {} + for ax in range(num_components): + middle_value = copy.deepcopy(PCA_Table['feature ' + str(f)][input_value]['ax' + str(ax)]) + middle_value = np.int(np.floor((middle_value - value_info["min"])*scale)) + Exact_Table['feature ' + str(f)][input_value]['ax' + str(ax)] = middle_value + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['feature ' + str(f)].keys())] + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "PCA" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['p4 config']["num components"] = num_components + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'dimension_reduction' + + json.dump(Planter_config, + open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, + cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + X_new = copy.deepcopy(sklearn_X_new) + + for ax in range(num_components): + X_new[:, ax] = sklearn_X_new[:, ax] - num_features*(value_info["min"]) + for ax in range(num_components): + corr, _ = pearsonr(X_new[:, ax],sklearn_X_new[:, ax]) + print('Pearsons correlation for axis '+str(ax)+' is: %.3f' % corr) + + return X_new + + + +def test_tables(sklearn_test_x, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_components = Planter_config['model config']['num components'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_x = copy.deepcopy(sklearn_test_x) + + + + for i in range(np.shape(test_X.values)[0]): + input_feature_value = test_X.values[i] + for ax in range(num_components): + switch_test_x[i][ax] = 0 + for f in range(num_features): + ax_middle = Exact_Table["feature "+str(f)][str(input_feature_value[f])] + for ax in range(num_components): + switch_test_x[i][ax] += ax_middle["ax"+str(ax)] + + for ax in range(num_components): + + corr, _ = pearsonr(sklearn_test_x[:, ax],switch_test_x[:, ax]) + print('Pearsons correlation of M/A PCA and output of Sklearn for axis '+str(ax)+' is: %.4f' % corr) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/PCA/readme.md b/src/models/PCA/readme.md index 05ed269..fa79682 100644 --- a/src/models/PCA/readme.md +++ b/src/models/PCA/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported PCA. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported PCA. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_1/dedicated_p4.py b/src/models/RF/Type_1/dedicated_p4.py index 39897a7..a805632 100755 --- a/src/models/RF/Type_1/dedicated_p4.py +++ b/src/models/RF/Type_1/dedicated_p4.py @@ -1,351 +1,351 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" \ - " meta.result = label;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":exact; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n") - ingress.write(" meta.tree_" + str(i) + "_prob" + " = prob;\n" \ - " meta.tree_" + str(i) + "_vote" + " = vote;\n" ) - ingress.write(" }\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" \ - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={read_prob" + str(i) + ";}\n") - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" \ - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={read_lable;}\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" \ - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num, count): - num = bin(num).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - codes = '' - for t in range(num_trees): - c_tree = Exact_Table['feature '+str(f)][idx][t] - c_len = Planter_config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - Entry["action_params"]["tree"] = int(codes, 2) - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_leaf_id"+str(t) - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][t][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Exact_Table['tree '+str(t)][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_prob"+str(t) - Entry["action_params"] = {} - Entry["action_params"]["prob"] = 0 - Entry["action_params"]["vote"] = int(Exact_Table['tree '+str(t)][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][t][f] - - for idx in Exact_Table['decision']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for t in range(num_trees): - Entry["match"]["meta.tree_" + str(t)+"_vote"] = int(Exact_Table['decision'][idx]['t' + str(t) + ' vote']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(Exact_Table['decision'][idx]['class']) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - codes = '' - for t in range(num_trees): - c_tree = Exact_Table['feature ' + str(f)][idx][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Exact_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str( - i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" \ + " meta.result = label;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":exact; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n") + ingress.write(" meta.tree_" + str(i) + "_prob" + " = prob;\n" \ + " meta.tree_" + str(i) + "_vote" + " = vote;\n" ) + ingress.write(" }\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" \ + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={read_prob" + str(i) + ";}\n") + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" \ + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={read_lable;}\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" \ + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num, count): + num = bin(num).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + codes = '' + for t in range(num_trees): + c_tree = Exact_Table['feature '+str(f)][idx][t] + c_len = Planter_config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + Entry["action_params"]["tree"] = int(codes, 2) + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_leaf_id"+str(t) + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][t][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Exact_Table['tree '+str(t)][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_prob"+str(t) + Entry["action_params"] = {} + Entry["action_params"]["prob"] = 0 + Entry["action_params"]["vote"] = int(Exact_Table['tree '+str(t)][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][t][f] + + for idx in Exact_Table['decision']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for t in range(num_trees): + Entry["match"]["meta.tree_" + str(t)+"_vote"] = int(Exact_Table['decision'][idx]['t' + str(t) + ' vote']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(Exact_Table['decision'][idx]['class']) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + codes = '' + for t in range(num_trees): + c_tree = Exact_Table['feature ' + str(f)][idx][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Exact_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str( + i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + diff --git a/src/models/RF/Type_1/readme.md b/src/models/RF/Type_1/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_1/readme.md +++ b/src/models/RF/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_1/table_generator.py b/src/models/RF/Type_1/table_generator.py index 4ba0cee..bbae0ae 100755 --- a/src/models/RF/Type_1/table_generator.py +++ b/src/models/RF/Type_1/table_generator.py @@ -1,534 +1,534 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # Random Forest - - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - table_name = 'Ternary_Table.json' - json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Exact_Table['tree '+str(tree)].keys())] - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature '+str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - print('tree(code/code to vote) table not matched') - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - print('decision(vote to class) table not matched', vote_list) - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 10 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["used columns"])+np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ',0 ) - - - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') - # train_X, train_y, test_X, test_y, used_features = load_data(Planter_config['model config']['number of features']) - # - # run_model(train_X, train_y, test_X, test_y, used_features) +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # Random Forest + + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + table_name = 'Ternary_Table.json' + json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Exact_Table['tree '+str(tree)].keys())] + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature '+str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + print('tree(code/code to vote) table not matched') + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + print('decision(vote to class) table not matched', vote_list) + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 10 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["used columns"])+np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ',0 ) + + + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') + # train_X, train_y, test_X, test_y, used_features = load_data(Planter_config['model config']['number of features']) + # + # run_model(train_X, train_y, test_X, test_y, used_features) diff --git a/src/models/RF/Type_1_xsa/dedicated_p4.py b/src/models/RF/Type_1_xsa/dedicated_p4.py index 7bcf243..1e4a2ff 100755 --- a/src/models/RF/Type_1_xsa/dedicated_p4.py +++ b/src/models/RF/Type_1_xsa/dedicated_p4.py @@ -1,440 +1,440 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config, xsa_architecture=False): - if xsa_architecture: - return - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_variables(fname, config): - with open(fname, 'a') as processing: - for i in range(0, config['num_features']): - processing.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - processing.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - processing.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - processing.write(" bit<7> tree_" + str(t) + "_prob;\n") - processing.write(" bit<32> DstAddr;\n") - -def separate_logics(fname, config): - # this should only be called for xsa architecture - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config, xsa_architecture=False): - if xsa_architecture: - min_code_width=10 - else: - min_code_width=0 - - with open(fname, 'a') as ingress: - # writing features: all features should be 32 bits, so this should be okay without minimum code width - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n") - if xsa_architecture: - ingress.write(" hdr.Planter.result = label;\n") - else: - ingress.write(" meta.result = label;\n") - ingress.write(" }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n") - if xsa_architecture: - ingress.write(" key = { hdr.Planter.feature" + str(i) + ":exact;}\n") - else: - ingress.write(" key = { meta.feature" + str(i) + ":exact; }\n") - ingress.write( " actions = {\n") - if xsa_architecture: - ingress.write(" extract_feature" + str(i) + "(code_f" + str(i) + ");\n") - else: - ingress.write(" extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n") - ingress.write(" NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n") - if xsa_architecture: - ingress.write(" tree_" + str(i) + "_prob" + " = prob;\n" \ - " tree_" + str(i) + "_vote" + " = vote;\n" ) - else: - ingress.write(" meta.tree_" + str(i) + "_prob" + " = prob;\n" \ - " meta.tree_" + str(i) + "_vote" + " = vote;\n" ) - ingress.write(" }\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" \ - " key = { ") - for j in range(0, config['num_features']): - if xsa_architecture: - key_len = 1 + int(count_code[j] + config['code_width'][i][j] - 1) - int(count_code[j]) - ingress.write("(bit<" + str(max(key_len, min_code_width)) + ">) code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact @name(\"lookup" + "_leaf_id" + str(i) + "\");\n ") - else: - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={read_prob" + str(i) + ";}\n") - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" \ - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - if xsa_architecture: - ingress.write("tree_" + str(t) + "_vote:exact;\n ") - else: - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={read_lable;}\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" \ - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num, count): - num = bin(num).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - codes = '' - for t in range(num_trees): - c_tree = Exact_Table['feature '+str(f)][idx][t] - c_len = Planter_config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - Entry["action_params"]["tree"] = int(codes, 2) - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_leaf_id"+str(t) - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][t][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Exact_Table['tree '+str(t)][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_prob"+str(t) - Entry["action_params"] = {} - Entry["action_params"]["prob"] = 0 - Entry["action_params"]["vote"] = int(Exact_Table['tree '+str(t)][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][t][f] - - for idx in Exact_Table['decision']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for t in range(num_trees): - Entry["match"]["meta.tree_" + str(t)+"_vote"] = int(Exact_Table['decision'][idx]['t' + str(t) + ' vote']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = np.int(Exact_Table['decision'][idx]['class']) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - codes = '' - for t in range(num_trees): - c_tree = Exact_Table['feature ' + str(f)][idx][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Exact_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") - - -def create_tables_Commend_esnet(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - codes = '' - for t in range(num_trees): - c_tree = Exact_Table['feature ' + str(f)][idx][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - file.write("table_add lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Exact_Table['decision']: - file.write("table_add decision read_lable ") - for t in range(num_trees): - file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") - -def edit_tables_command_esnet_software(fname): - with open(fname, 'a') as file: - file.write( - "# run traffin\n" - "run_traffic packets\n\n" - "# end\n" - "exit\n") - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file_esnet_hardware = work_root + "/src/targets/alveo_u280/hardware/s1-commands.txt" - create_tables_Commend_esnet(commend_file_esnet_hardware, Planter_config) - - commend_file_esnet_software = work_root + "/src/targets/alveo_u280/behavioral/test_environment/sim/test-case0/runsim.txt" - create_tables_Commend_esnet(commend_file_esnet_software, Planter_config) - edit_tables_command_esnet_software(commend_file_esnet_software) - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str( - i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config, xsa_architecture=False): + if xsa_architecture: + return + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_variables(fname, config): + with open(fname, 'a') as processing: + for i in range(0, config['num_features']): + processing.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + processing.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + processing.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + processing.write(" bit<7> tree_" + str(t) + "_prob;\n") + processing.write(" bit<32> DstAddr;\n") + +def separate_logics(fname, config): + # this should only be called for xsa architecture + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config, xsa_architecture=False): + if xsa_architecture: + min_code_width=10 + else: + min_code_width=0 + + with open(fname, 'a') as ingress: + # writing features: all features should be 32 bits, so this should be okay without minimum code width + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n") + if xsa_architecture: + ingress.write(" hdr.Planter.result = label;\n") + else: + ingress.write(" meta.result = label;\n") + ingress.write(" }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n") + if xsa_architecture: + ingress.write(" key = { hdr.Planter.feature" + str(i) + ":exact;}\n") + else: + ingress.write(" key = { meta.feature" + str(i) + ":exact; }\n") + ingress.write( " actions = {\n") + if xsa_architecture: + ingress.write(" extract_feature" + str(i) + "(code_f" + str(i) + ");\n") + else: + ingress.write(" extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n") + ingress.write(" NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n") + if xsa_architecture: + ingress.write(" tree_" + str(i) + "_prob" + " = prob;\n" \ + " tree_" + str(i) + "_vote" + " = vote;\n" ) + else: + ingress.write(" meta.tree_" + str(i) + "_prob" + " = prob;\n" \ + " meta.tree_" + str(i) + "_vote" + " = vote;\n" ) + ingress.write(" }\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" \ + " key = { ") + for j in range(0, config['num_features']): + if xsa_architecture: + key_len = 1 + int(count_code[j] + config['code_width'][i][j] - 1) - int(count_code[j]) + ingress.write("(bit<" + str(max(key_len, min_code_width)) + ">) code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact @name(\"lookup" + "_leaf_id" + str(i) + "\");\n ") + else: + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={read_prob" + str(i) + ";}\n") + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" \ + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + if xsa_architecture: + ingress.write("tree_" + str(t) + "_vote:exact;\n ") + else: + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={read_lable;}\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" \ + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num, count): + num = bin(num).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + codes = '' + for t in range(num_trees): + c_tree = Exact_Table['feature '+str(f)][idx][t] + c_len = Planter_config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + Entry["action_params"]["tree"] = int(codes, 2) + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_leaf_id"+str(t) + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][t][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Exact_Table['tree '+str(t)][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_prob"+str(t) + Entry["action_params"] = {} + Entry["action_params"]["prob"] = 0 + Entry["action_params"]["vote"] = int(Exact_Table['tree '+str(t)][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][t][f] + + for idx in Exact_Table['decision']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for t in range(num_trees): + Entry["match"]["meta.tree_" + str(t)+"_vote"] = int(Exact_Table['decision'][idx]['t' + str(t) + ' vote']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = np.int(Exact_Table['decision'][idx]['class']) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + codes = '' + for t in range(num_trees): + c_tree = Exact_Table['feature ' + str(f)][idx][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Exact_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") + + +def create_tables_Commend_esnet(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + codes = '' + for t in range(num_trees): + c_tree = Exact_Table['feature ' + str(f)][idx][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + file.write("table_add lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Exact_Table['decision']: + file.write("table_add decision read_lable ") + for t in range(num_trees): + file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") + +def edit_tables_command_esnet_software(fname): + with open(fname, 'a') as file: + file.write( + "# run traffin\n" + "run_traffic packets\n\n" + "# end\n" + "exit\n") + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file_esnet_hardware = work_root + "/src/targets/alveo_u280/hardware/s1-commands.txt" + create_tables_Commend_esnet(commend_file_esnet_hardware, Planter_config) + + commend_file_esnet_software = work_root + "/src/targets/alveo_u280/behavioral/test_environment/sim/test-case0/runsim.txt" + create_tables_Commend_esnet(commend_file_esnet_software, Planter_config) + edit_tables_command_esnet_software(commend_file_esnet_software) + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str( + i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + diff --git a/src/models/RF/Type_1_xsa/readme.md b/src/models/RF/Type_1_xsa/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_1_xsa/readme.md +++ b/src/models/RF/Type_1_xsa/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_1_xsa/table_generator.py b/src/models/RF/Type_1_xsa/table_generator.py index a445732..b0b66c9 100755 --- a/src/models/RF/Type_1_xsa/table_generator.py +++ b/src/models/RF/Type_1_xsa/table_generator.py @@ -1,528 +1,528 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # Random Forest - - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - table_name = 'Ternary_Table.json' - json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Exact_Table['tree '+str(tree)].keys())] - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature '+str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - # Planter_config['p4 config']["width of leaf-id"] = 8 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - print('tree(code/code to vote) table not matched') - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - print('decision(vote to class) table not matched', vote_list) - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 10 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["used columns"])+np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ',0 ) - - - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # Random Forest + + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + table_name = 'Ternary_Table.json' + json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Exact_Table['tree '+str(tree)].keys())] + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature '+str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + # Planter_config['p4 config']["width of leaf-id"] = 8 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + print('tree(code/code to vote) table not matched') + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + print('decision(vote to class) table not matched', vote_list) + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 10 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["used columns"])+np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ',0 ) + + + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') + diff --git a/src/models/RF/Type_2/dedicated_p4.py b/src/models/RF/Type_2/dedicated_p4.py index 790015f..4d11354 100755 --- a/src/models/RF/Type_2/dedicated_p4.py +++ b/src/models/RF/Type_2/dedicated_p4.py @@ -1,368 +1,368 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":exact; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num, count): - num = bin(num).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - codes = '' - for t in range(num_trees): - c_tree = Exact_Table['feature '+str(f)][idx][t] - c_len = Planter_config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - Entry["action_params"]["tree"] = int(codes, 2) - Table_entries += [Entry] - - count_code = {} - for f in range(num_features): - count_code[f] = 0 - - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_leaf_id"+str(t) - Entry["match"] = {} - for f in range(num_features): - key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][t][f] - 1)) + ":" + str(int(count_code[f])) + "]" - Entry["match"][key] = int(Exact_Table['tree '+str(t)][idx]['f'+str(f)+' code']) - Entry["action_name"] = "SwitchIngress.read_prob"+str(t) - Entry["action_params"] = {} - Entry["action_params"]["prob"] = 0 - Entry["action_params"]["vote"] = int(Exact_Table['tree '+str(t)][idx]['leaf']) - Table_entries += [Entry] - for f in range(num_features): - count_code[f] += Planter_config['p4 config']['width of code'][t][f] - - for idx in Exact_Table['decision']: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for t in range(num_trees): - Entry["match"]["meta.tree_" + str(t)+"_vote"] = int(Exact_Table['decision'][idx]['t' + str(t) + ' vote']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = int(Exact_Table['decision'][idx]['class']) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - for f in range(num_features): - for idx in Exact_Table['feature ' + str(f)]: - key = int(idx) - codes = '' - for t in range(num_trees): - c_tree = Exact_Table['feature ' + str(f)][idx][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => "+str(label)+"\n") - - file.write("\n") - - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Exact_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str( - i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_feature" + str(i) + \ - ".add_with_extract_feature" + str(i) + \ - "(int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":exact; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num, count): + num = bin(num).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + codes = '' + for t in range(num_trees): + c_tree = Exact_Table['feature '+str(f)][idx][t] + c_len = Planter_config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + Entry["action_params"]["tree"] = int(codes, 2) + Table_entries += [Entry] + + count_code = {} + for f in range(num_features): + count_code[f] = 0 + + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_leaf_id"+str(t) + Entry["match"] = {} + for f in range(num_features): + key = "meta.code_f"+str(f)+"[" + str(int(count_code[f] + Planter_config['p4 config']['width of code'][t][f] - 1)) + ":" + str(int(count_code[f])) + "]" + Entry["match"][key] = int(Exact_Table['tree '+str(t)][idx]['f'+str(f)+' code']) + Entry["action_name"] = "SwitchIngress.read_prob"+str(t) + Entry["action_params"] = {} + Entry["action_params"]["prob"] = 0 + Entry["action_params"]["vote"] = int(Exact_Table['tree '+str(t)][idx]['leaf']) + Table_entries += [Entry] + for f in range(num_features): + count_code[f] += Planter_config['p4 config']['width of code'][t][f] + + for idx in Exact_Table['decision']: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for t in range(num_trees): + Entry["match"]["meta.tree_" + str(t)+"_vote"] = int(Exact_Table['decision'][idx]['t' + str(t) + ' vote']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = int(Exact_Table['decision'][idx]['class']) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + for f in range(num_features): + for idx in Exact_Table['feature ' + str(f)]: + key = int(idx) + codes = '' + for t in range(num_trees): + c_tree = Exact_Table['feature ' + str(f)][idx][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => "+str(label)+"\n") + + file.write("\n") + + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Exact_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Exact_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Exact_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Exact_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str( + i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_feature" + str(i) + \ + ".add_with_extract_feature" + str(i) + \ + "(int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/RF/Type_2/readme.md b/src/models/RF/Type_2/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_2/readme.md +++ b/src/models/RF/Type_2/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_2/table_generator.py b/src/models/RF/Type_2/table_generator.py index 0467ac0..3ac052c 100755 --- a/src/models/RF/Type_2/table_generator.py +++ b/src/models/RF/Type_2/table_generator.py @@ -1,554 +1,554 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # Random Forest - - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - # num_features = len(train_X.keys()) - # num_classes = np.max(train_y)+1 - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - Ternary_Table['tree '+str(t)] = {} - for idx in Exact_Table['tree '+str(t)]: - if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: - Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] - code_table_size += 1 - Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) - - # ===================== prepare default class ========================= - - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [ Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Ternary_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) - # ================================================================== - - table_name = 'Ternary_Table.json' - json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Exact_Table['tree '+str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature '+str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - - - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # Random Forest + + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + # num_features = len(train_X.keys()) + # num_classes = np.max(train_y)+1 + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + Ternary_Table['tree '+str(t)] = {} + for idx in Exact_Table['tree '+str(t)]: + if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: + Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] + code_table_size += 1 + Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) + + # ===================== prepare default class ========================= + + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [ Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Ternary_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) + # ================================================================== + + table_name = 'Ternary_Table.json' + json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Exact_Table['tree '+str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Exact_Table['feature '+str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.3}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + + + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') \ No newline at end of file diff --git a/src/models/RF/Type_3/dedicated_p4.py b/src/models/RF/Type_3/dedicated_p4.py index 6a5728a..98fec0f 100755 --- a/src/models/RF/Type_3/dedicated_p4.py +++ b/src/models/RF/Type_3/dedicated_p4.py @@ -1,323 +1,323 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n" - " table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n" - " table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in Ternary_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Ternary_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - # Load feature tables - # for i in range(0, config['num_features']): - # tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( - # i) + "'].keys()),'entries')\n" \ - # "for key in table['feature " + str(i) + "']:\n") - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - # tload.write(" codes = ten_to_bin(int(table['feature " + str( - # i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - # - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - # tload.write(" " + (config['debug_load_table'] * "# ") + \ - # "Ingress.lookup_feature" + str(i) + \ - # ".add_with_extract_feature" + str(i) + \ - # "(int(key), int(codes,2))\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + - "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n" + " table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n" + " table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in Ternary_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Ternary_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + # Load feature tables + # for i in range(0, config['num_features']): + # tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( + # i) + "'].keys()),'entries')\n" \ + # "for key in table['feature " + str(i) + "']:\n") + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + # tload.write(" codes = ten_to_bin(int(table['feature " + str( + # i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + # + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + # tload.write(" " + (config['debug_load_table'] * "# ") + \ + # "Ingress.lookup_feature" + str(i) + \ + # ".add_with_extract_feature" + str(i) + \ + # "(int(key), int(codes,2))\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/RF/Type_3/readme.md b/src/models/RF/Type_3/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_3/readme.md +++ b/src/models/RF/Type_3/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_3/table_generator.py b/src/models/RF/Type_3/table_generator.py index 567b808..9ace82b 100755 --- a/src/models/RF/Type_3/table_generator.py +++ b/src/models/RF/Type_3/table_generator.py @@ -1,579 +1,579 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -import time -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - feature_names += ["f" +chr(ord('A')+first_letter)+chr(ord('A')+second_letter)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if l==0: - text += "f"+chr(ord('A')+first_letter)+chr(ord('A')+second_letter) - else: - text += "|f"+chr(ord('A')+first_letter)+chr(ord('A')+second_letter) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if feature == "f"+chr(ord('A')+first_letter)+chr(ord('A')+second_letter): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Random Forest - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - Ternary_Table['tree '+str(t)] = {} - for idx in Exact_Table['tree '+str(t)]: - if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: - Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] - code_table_size += 1 - Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) - - # ===================== prepare default class ========================= - - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [ Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Ternary_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - table_name = 'Ternary_Table.json' - json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree '+str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature '+str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - switch_prediction = Planter_config['p4 config']["default label"] - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") - - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +import time +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + feature_names += ["f" +chr(ord('A')+first_letter)+chr(ord('A')+second_letter)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if l==0: + text += "f"+chr(ord('A')+first_letter)+chr(ord('A')+second_letter) + else: + text += "|f"+chr(ord('A')+first_letter)+chr(ord('A')+second_letter) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if feature == "f"+chr(ord('A')+first_letter)+chr(ord('A')+second_letter): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Random Forest + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + Ternary_Table['tree '+str(t)] = {} + for idx in Exact_Table['tree '+str(t)]: + if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: + Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] + code_table_size += 1 + Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) + + # ===================== prepare default class ========================= + + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [ Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Ternary_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + table_name = 'Ternary_Table.json' + json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree '+str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature '+str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + switch_prediction = Planter_config['p4 config']["default label"] + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") + + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/RF/Type_4/dedicated_p4.py b/src/models/RF/Type_4/dedicated_p4.py index 2ead009..19a0cc3 100755 --- a/src/models/RF/Type_4/dedicated_p4.py +++ b/src/models/RF/Type_4/dedicated_p4.py @@ -1,317 +1,317 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + - "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + + "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - + int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/RF/Type_4/readme.md b/src/models/RF/Type_4/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_4/readme.md +++ b/src/models/RF/Type_4/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_4/table_generator.py b/src/models/RF/Type_4/table_generator.py index 8071350..b752e37 100755 --- a/src/models/RF/Type_4/table_generator.py +++ b/src/models/RF/Type_4/table_generator.py @@ -1,586 +1,586 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -import time -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -import copy -import os - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if l == 0: - text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - else: - text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Random Forest - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - # num_features = len(train_X.keys()) - # num_classes = np.max(train_y)+1 - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - LPM_Table['tree '+str(t)] = {} - for idx in Exact_Table['tree '+str(t)]: - if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: - LPM_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] - code_table_size += 1 - Exact_Table['tree '+str(t)] = copy.deepcopy(LPM_Table['tree '+str(t)]) - - # ===================== prepare default class ========================= - - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [ Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - LPM_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - table_name = 'LPM_Table.json' - json.dump(LPM_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nLPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree '+str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature '+str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list)!=str(lpm_code_list): - print('error in exact to ternary match', code_list, lpm_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") - - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('LPM match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +import time +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +import copy +import os + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if l == 0: + text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + else: + text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Random Forest + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + # num_features = len(train_X.keys()) + # num_classes = np.max(train_y)+1 + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + LPM_Table['tree '+str(t)] = {} + for idx in Exact_Table['tree '+str(t)]: + if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: + LPM_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] + code_table_size += 1 + Exact_Table['tree '+str(t)] = copy.deepcopy(LPM_Table['tree '+str(t)]) + + # ===================== prepare default class ========================= + + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [ Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + LPM_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + table_name = 'LPM_Table.json' + json.dump(LPM_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nLPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree '+str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature '+str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list)!=str(lpm_code_list): + print('error in exact to ternary match', code_list, lpm_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") + + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('LPM match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') \ No newline at end of file diff --git a/src/models/RF/Type_5/dedicated_p4.py b/src/models/RF/Type_5/dedicated_p4.py index 2ead009..19a0cc3 100755 --- a/src/models/RF/Type_5/dedicated_p4.py +++ b/src/models/RF/Type_5/dedicated_p4.py @@ -1,317 +1,317 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + - "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + str(((1 << 8) - 1) & (key >> 24)) + "." + str(((1 << 8) - 1) & (key >> 16)) + + "." + str(((1 << 8) - 1) & (key >> 8)) + "." + str(((1 << 8) - 1) & (key)) + "/" + str(32 - + int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/RF/Type_5/readme.md b/src/models/RF/Type_5/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_5/readme.md +++ b/src/models/RF/Type_5/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_5/table_generator.py b/src/models/RF/Type_5/table_generator.py index 1e53b32..8406635 100755 --- a/src/models/RF/Type_5/table_generator.py +++ b/src/models/RF/Type_5/table_generator.py @@ -1,597 +1,597 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -import time -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -from src.functions.Muti_Exact_to_LPM import * -import copy -import os - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - feature_names += ["f" + chr(ord('A') + l)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - if l==0: - text += "f"+chr(ord('A')+l) - else: - text += "|f" + chr(ord('A')+l) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - if feature == "f"+chr(ord('A')+l): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Random Forest - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - # num_features = len(train_X.keys()) - # num_classes = np.max(train_y)+1 - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) - # print('stop') - - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) - - - # ===================== tree table to LPM ========================= - default_vote = 0 - - for t in range(num_trees): - LPM_Table['tree '+str(t)] = {} - print('') - print('Begine transfer: Tree '+str(t)+' table ') - key_name = [] - for f in range(num_features): - key_name += ['f' + str(f) + ' code'] - action_name = 'leaf' - # prepare default - LPM_Table['tree '+str(t)] = Muti_Exact_to_LPM_Concatination(Exact_Table['tree '+str(t)], code_width_tree_feature[t], key_name, action_name) - - # ===================== decision table to LPM ========================= - default_class = 0 - - LPM_Table['decision'] = {} - print('') - print('Begine transfer decision table ') - key_name = [] - for t in range(num_trees): - key_name += ['t' + str(t) + ' vote'] - action_name = 'class' - - decision_table_key_width = [] - for t in range(num_trees): - # decision_table_key_width += [5] - decision_table_key_width += [int(1+np.ceil(math.log(num_classes, 2)))] - - # prepare default - LPM_Table['decision'] = Muti_Exact_to_LPM_Concatination(Exact_Table['decision'], decision_table_key_width, key_name, action_name) - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - table_name = 'LPM_Table.json' - json.dump(LPM_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nLPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree '+str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["width of decision table keys"] = decision_table_key_width - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature '+str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - decision_table_key_width = Planter_config['p4 config']["width of decision table keys"] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list)!=str(lpm_code_list): - print('error in exact to lpm match', code_list, lpm_code_list) - - binary_code = '' - for f in range(num_features): - binary_code = binary_code + ten_to_bin(int(code_list[f]), int(Planter_config['p4 config']["width of code"][tree][f])) - decimal_code = int(binary_code, 2) - - LPM_table = LPM_Table['tree '+str(tree)] - keys = list(LPM_table.keys()) - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - vote_list[tree] = action[max_index] - - binary_code = '' - for t in range(num_trees): - binary_code = binary_code + ten_to_bin(int(vote_list[t]), decision_table_key_width[t]) - decimal_code = int(binary_code, 2) - - LPM_table = LPM_Table['decision'] - keys = list(LPM_table.keys()) - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - switch_prediction = action[max_index] - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") - - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - print('Exit, the P4 generator (dedicate P4) is currently unavailable in this variation.') - exit() - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('LPM match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +import time +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +from src.functions.Muti_Exact_to_LPM import * +import copy +import os + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + feature_names += ["f" + chr(ord('A') + l)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + if l==0: + text += "f"+chr(ord('A')+l) + else: + text += "|f" + chr(ord('A')+l) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + if feature == "f"+chr(ord('A')+l): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Random Forest + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + # num_features = len(train_X.keys()) + # num_classes = np.max(train_y)+1 + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) + # print('stop') + + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) + + + # ===================== tree table to LPM ========================= + default_vote = 0 + + for t in range(num_trees): + LPM_Table['tree '+str(t)] = {} + print('') + print('Begine transfer: Tree '+str(t)+' table ') + key_name = [] + for f in range(num_features): + key_name += ['f' + str(f) + ' code'] + action_name = 'leaf' + # prepare default + LPM_Table['tree '+str(t)] = Muti_Exact_to_LPM_Concatination(Exact_Table['tree '+str(t)], code_width_tree_feature[t], key_name, action_name) + + # ===================== decision table to LPM ========================= + default_class = 0 + + LPM_Table['decision'] = {} + print('') + print('Begine transfer decision table ') + key_name = [] + for t in range(num_trees): + key_name += ['t' + str(t) + ' vote'] + action_name = 'class' + + decision_table_key_width = [] + for t in range(num_trees): + # decision_table_key_width += [5] + decision_table_key_width += [int(1+np.ceil(math.log(num_classes, 2)))] + + # prepare default + LPM_Table['decision'] = Muti_Exact_to_LPM_Concatination(Exact_Table['decision'], decision_table_key_width, key_name, action_name) + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + table_name = 'LPM_Table.json' + json.dump(LPM_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nLPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree '+str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["width of decision table keys"] = decision_table_key_width + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature '+str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + decision_table_key_width = Planter_config['p4 config']["width of decision table keys"] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list)!=str(lpm_code_list): + print('error in exact to lpm match', code_list, lpm_code_list) + + binary_code = '' + for f in range(num_features): + binary_code = binary_code + ten_to_bin(int(code_list[f]), int(Planter_config['p4 config']["width of code"][tree][f])) + decimal_code = int(binary_code, 2) + + LPM_table = LPM_Table['tree '+str(tree)] + keys = list(LPM_table.keys()) + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + vote_list[tree] = action[max_index] + + binary_code = '' + for t in range(num_trees): + binary_code = binary_code + ten_to_bin(int(vote_list[t]), decision_table_key_width[t]) + decimal_code = int(binary_code, 2) + + LPM_table = LPM_Table['decision'] + keys = list(LPM_table.keys()) + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + switch_prediction = action[max_index] + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") + + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + print('Exit, the P4 generator (dedicate P4) is currently unavailable in this variation.') + exit() + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('LPM match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/RF/Type_DM/dedicated_p4.py b/src/models/RF/Type_DM/dedicated_p4.py index 1808406..8f5210c 100755 --- a/src/models/RF/Type_DM/dedicated_p4.py +++ b/src/models/RF/Type_DM/dedicated_p4.py @@ -1,263 +1,263 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['num_depth'] = config_file["number of depth"] - config['model'] = config_file['model'] - config["decision_table_size"] = config_file["decision table size"] - config['default label'] = config_file["default label"] - config['first_entry'] = config_file["first entry info"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n" - "#define CLASS_NOT_SET 10\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - - # headers.write("#define CLASS_NOT_SET 10\n\n") - # headers.write("struct metadata_t {\n") - for t in range(0, config['num_trees']): - headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") - - for t in range(0, config['num_trees']): - headers.write(" bit<16> node_id"+str(t+1)+";\n" - " bit<16> prevFeature"+str(t+1)+";\n" - " bit<16> isTrue"+str(t+1)+";\n" - " bit<32> th" + str(t+1) + ";\n") - headers.write(" bit<32> DstAddr;\n") - # "}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") - ingress.write("\n") - for t in range(0, config['num_trees']): - ingress.write(" meta.node_id"+str(t+1)+" = "+str(config['first_entry'][t][0])+";\n" - " meta.prevFeature"+str(t+1)+" = "+str(config['first_entry'][t][1])+";\n" - " meta.isTrue"+str(t+1)+" = "+str(config['first_entry'][t][2])+";\n") - for d in range(0, config['num_depth']): - ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n" - " "+d*" "+"if (meta.th"+str(t+1)+" & 0b10000000000000000000000000000000 == 0) meta.isTrue"+str(t+1)+" = 1;\n" - " "+d*" "+"else meta.isTrue"+str(t+1)+" = 0;\n") - ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") - ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") - ingress.write(" " + (config['num_depth']) * "} " + "\n\n") - - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - for f in range(0, config['num_features']): - ingress.write(" action CheckFeature"+str(t+1)+"_"+str(f)+"(bit<16> node_id, bit<32> threshold) {\n" - " meta.th"+str(t+1)+" = threshold - meta.feature"+str(f)+";\n" - " meta.prevFeature"+str(t+1)+" = "+str(f)+";\n" - " meta.node_id"+str(t+1)+" = node_id;\n" - " }\n\n") - - for t in range(0, config['num_trees']): - ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" - " meta.tree_" + str(t+1) + "_vote = class;\n" - " meta.node_id" + str(t+1) + " = node_id; // just for debugging otherwise not needed\n" - " }\n") - - - for t in range(0, config['num_trees']): - for d in range(0, config['num_depth']+1): - ingress.write(" @pragma stage "+str(d+1)+"\n") - ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" - " key = {\n" - " meta.node_id" + str(t+1) + ": exact;\n" - " meta.prevFeature" + str(t+1) + ": exact;\n" - " meta.isTrue" + str(t+1) + ": exact;\n" - " }\n" - " actions = {\n" - " NoAction;\n") - for f in range(0, config['num_features']): - ingress.write(" CheckFeature"+str(t+1)+"_"+str(f)+";\n") - ingress.write(" SetClass"+str(t+1)+";\n" - " }\n" - " size = 1024;\n" - " }\n\n") - - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_command(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Table = json.load(open('Tables/Exact_Table.json', 'r')) - - fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' - - with open(fname_current, 'a') as file: - for idx in Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") - file.write("=> " + str(Table['decision'][idx]['class']) + "\n") - - with open(fname, 'w') as command: - command.write('') - current_file = open(fname_current, 'r') - for line in current_file: - new_file = open(fname, 'a') # Use append mode here - new_file.write(line) - current_file.close() - new_file.close() - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - table = json.load(open('./Tables/Exact_Table.json', 'r')) - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - - # Load decision tables - tload.write("print('load table for each nodes')\n") - for idx in table['node table']: - if table['node table'][idx][0] == "CheckFeature": - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][7] - else: - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] - - tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ - '('+key_and_values+')\n') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['num_depth'] = config_file["number of depth"] + config['model'] = config_file['model'] + config["decision_table_size"] = config_file["decision table size"] + config['default label'] = config_file["default label"] + config['first_entry'] = config_file["first entry info"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n" + "#define CLASS_NOT_SET 10\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + + # headers.write("#define CLASS_NOT_SET 10\n\n") + # headers.write("struct metadata_t {\n") + for t in range(0, config['num_trees']): + headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") + + for t in range(0, config['num_trees']): + headers.write(" bit<16> node_id"+str(t+1)+";\n" + " bit<16> prevFeature"+str(t+1)+";\n" + " bit<16> isTrue"+str(t+1)+";\n" + " bit<32> th" + str(t+1) + ";\n") + headers.write(" bit<32> DstAddr;\n") + # "}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") + ingress.write("\n") + for t in range(0, config['num_trees']): + ingress.write(" meta.node_id"+str(t+1)+" = "+str(config['first_entry'][t][0])+";\n" + " meta.prevFeature"+str(t+1)+" = "+str(config['first_entry'][t][1])+";\n" + " meta.isTrue"+str(t+1)+" = "+str(config['first_entry'][t][2])+";\n") + for d in range(0, config['num_depth']): + ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n" + " "+d*" "+"if (meta.th"+str(t+1)+" & 0b10000000000000000000000000000000 == 0) meta.isTrue"+str(t+1)+" = 1;\n" + " "+d*" "+"else meta.isTrue"+str(t+1)+" = 0;\n") + ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") + ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") + ingress.write(" " + (config['num_depth']) * "} " + "\n\n") + + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + for f in range(0, config['num_features']): + ingress.write(" action CheckFeature"+str(t+1)+"_"+str(f)+"(bit<16> node_id, bit<32> threshold) {\n" + " meta.th"+str(t+1)+" = threshold - meta.feature"+str(f)+";\n" + " meta.prevFeature"+str(t+1)+" = "+str(f)+";\n" + " meta.node_id"+str(t+1)+" = node_id;\n" + " }\n\n") + + for t in range(0, config['num_trees']): + ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" + " meta.tree_" + str(t+1) + "_vote = class;\n" + " meta.node_id" + str(t+1) + " = node_id; // just for debugging otherwise not needed\n" + " }\n") + + + for t in range(0, config['num_trees']): + for d in range(0, config['num_depth']+1): + ingress.write(" @pragma stage "+str(d+1)+"\n") + ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" + " key = {\n" + " meta.node_id" + str(t+1) + ": exact;\n" + " meta.prevFeature" + str(t+1) + ": exact;\n" + " meta.isTrue" + str(t+1) + ": exact;\n" + " }\n" + " actions = {\n" + " NoAction;\n") + for f in range(0, config['num_features']): + ingress.write(" CheckFeature"+str(t+1)+"_"+str(f)+";\n") + ingress.write(" SetClass"+str(t+1)+";\n" + " }\n" + " size = 1024;\n" + " }\n\n") + + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_command(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Table = json.load(open('Tables/Exact_Table.json', 'r')) + + fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' + + with open(fname_current, 'a') as file: + for idx in Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") + file.write("=> " + str(Table['decision'][idx]['class']) + "\n") + + with open(fname, 'w') as command: + command.write('') + current_file = open(fname_current, 'r') + for line in current_file: + new_file = open(fname, 'a') # Use append mode here + new_file.write(line) + current_file.close() + new_file.close() + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + table = json.load(open('./Tables/Exact_Table.json', 'r')) + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + + # Load decision tables + tload.write("print('load table for each nodes')\n") + for idx in table['node table']: + if table['node table'][idx][0] == "CheckFeature": + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][7] + else: + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + + tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ + '('+key_and_values+')\n') diff --git a/src/models/RF/Type_DM/readme.md b/src/models/RF/Type_DM/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_DM/readme.md +++ b/src/models/RF/Type_DM/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_DM/table_generator.py b/src/models/RF/Type_DM/table_generator.py index d650f94..0d79aa5 100755 --- a/src/models/RF/Type_DM/table_generator.py +++ b/src/models/RF/Type_DM/table_generator.py @@ -1,379 +1,379 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh -# If you want to use this type of model, -# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -from sklearn import tree -from sklearn.tree import export_text -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier - -# i_tree = 0 -# -# global_id = 0 - - -def export_p4(decision_tree, fname): - tree_ = decision_tree.tree_ - class_names = decision_tree.classes_ - right_child_fmt = "{} {} <= {}\n" - left_child_fmt = "{} {} > {}\n" - truncation_fmt = "{} {}\n" - feature_names_ = ["{}".format(i) for i in tree_.feature] - export_text.report = "" - max_depth = 10 - spacing = 3 - decimals = 2 - show_weights = False - - if isinstance(decision_tree, DecisionTreeClassifier): - value_fmt = "{}{} weights: {}\n" - if not show_weights: - value_fmt = "{}{}{}\n" - else: - value_fmt = "{}{} value: {}\n" - - def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): - global global_id - global i_tree - global Exact_Table - - current_id = global_id - - val = '' - is_classification = isinstance(decision_tree, - DecisionTreeClassifier) - if show_weights or not is_classification: - val = ["{1:.{0}f}, ".format(decimals, v) for v in value] - val = '[' + ''.join(val)[:-2] + ']' - if is_classification: - val += ' class: ' + str(class_name) - export_text.report += value_fmt.format(indent, '', val) - # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, - # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ - " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ - str(int(float(class_name))) +"\n") - - Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), - "level_" + str(i_tree) + "_" + str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), - str(int(float(class_name)))] - Exact_Table['node table counter'] += 1 - - - - - - def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): - indent = ("|" + (" " * spacing)) * depth - indent = indent[:-spacing] + "-" * spacing - global global_id - global i_tree - global Exact_Table - - global_id = global_id + 1 - current_id = global_id - - value = None - if tree_.n_outputs == 1: - value = tree_.value[node][0] - else: - value = tree_.value[node].T[0] - class_name = np.argmax(value) - - if (tree_.n_classes[0] != 1 and - tree_.n_outputs == 1): - class_name = class_names[class_name] - - if depth <= max_depth + 1: - info_fmt = "" - info_fmt_left = info_fmt - info_fmt_right = info_fmt - - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_names_[node] - threshold = tree_.threshold[node] - threshold = "{1:.{0}f}".format(decimals, threshold) - export_text.report += right_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_left - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature" - +str(i_tree)+"_"+ str(name)+ " "+ str(previous_id) + " " + str(prevfeature) + " "+ - str(result) + " => " + str(current_id) + " " + str(int(float(threshold)))+"\n") - global first_entry - global entry_info - global Exact_Table - - Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature"+str(i_tree)+"_"+ str(name), - "level_"+str(i_tree)+ "_"+str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), str(name) , - str(int(float(threshold)))] - Exact_Table['node table counter'] += 1 - - if first_entry: - first_entry = False - entry_info += [[previous_id, prevfeature, result]] - - print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) - - export_text.report += left_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_right - # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) - - print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) - else: # leaf - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - subtree_depth = _compute_depth(tree_, node) - if subtree_depth == 1: - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - trunc_report = 'truncated branch of depth %d' % subtree_depth - export_text.report += truncation_fmt.format(indent, - trunc_report) - - print_tree_recurse(0, 1, 0, 1, global_id, fname) - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 3) ') or '3') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 2) ') or '2') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 10000) ') or '10000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - # Random Forest - - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' - # refresh the command (Table) file - with open(fname, 'w') as command: - command.write('') - - global global_id - global i_tree - global first_entry - global entry_info - global Exact_Table - - i_tree = 0 - global_id = 0 - entry_info = [] - Exact_Table = {} - Exact_Table['node table'] = {} - Exact_Table['node table counter'] = 0 - - for idx, estimator in enumerate(rfc.estimators_): - with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: - f.write('') - with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: - get_lineage(estimator, feature_names, f) - first_entry = True - i_tree = i_tree + 1 - export_p4(estimator, fname) - # print(entry_info) - - g_table = {} - print("Generating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - g_table['decision'] = g_table['votes to class'] - - collect_class = [] - for idx in g_table['decision']: - collect_class += [g_table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Exact_Table['decision'] = {} - for idx in g_table['decision']: - if g_table['decision'][idx]['class'] != default_class: - Exact_Table['decision'][code_table_size] = g_table['decision'][idx] - code_table_size += 1 - - - - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Depth_Based_Table.txt and Exact_Table.json is generated') - - - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']["number of depth"] = num_depth - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["first entry info"] = entry_info - Planter_config['p4 config']["default label"] = default_class - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - print('The python simulation test does not support this model, please do the following emulation test on the software switch.') - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh +# If you want to use this type of model, +# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +from sklearn import tree +from sklearn.tree import export_text +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier + +# i_tree = 0 +# +# global_id = 0 + + +def export_p4(decision_tree, fname): + tree_ = decision_tree.tree_ + class_names = decision_tree.classes_ + right_child_fmt = "{} {} <= {}\n" + left_child_fmt = "{} {} > {}\n" + truncation_fmt = "{} {}\n" + feature_names_ = ["{}".format(i) for i in tree_.feature] + export_text.report = "" + max_depth = 10 + spacing = 3 + decimals = 2 + show_weights = False + + if isinstance(decision_tree, DecisionTreeClassifier): + value_fmt = "{}{} weights: {}\n" + if not show_weights: + value_fmt = "{}{}{}\n" + else: + value_fmt = "{}{} value: {}\n" + + def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): + global global_id + global i_tree + global Exact_Table + + current_id = global_id + + val = '' + is_classification = isinstance(decision_tree, + DecisionTreeClassifier) + if show_weights or not is_classification: + val = ["{1:.{0}f}, ".format(decimals, v) for v in value] + val = '[' + ''.join(val)[:-2] + ']' + if is_classification: + val += ' class: ' + str(class_name) + export_text.report += value_fmt.format(indent, '', val) + # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, + # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ + " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ + str(int(float(class_name))) +"\n") + + Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), + "level_" + str(i_tree) + "_" + str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), + str(int(float(class_name)))] + Exact_Table['node table counter'] += 1 + + + + + + def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): + indent = ("|" + (" " * spacing)) * depth + indent = indent[:-spacing] + "-" * spacing + global global_id + global i_tree + global Exact_Table + + global_id = global_id + 1 + current_id = global_id + + value = None + if tree_.n_outputs == 1: + value = tree_.value[node][0] + else: + value = tree_.value[node].T[0] + class_name = np.argmax(value) + + if (tree_.n_classes[0] != 1 and + tree_.n_outputs == 1): + class_name = class_names[class_name] + + if depth <= max_depth + 1: + info_fmt = "" + info_fmt_left = info_fmt + info_fmt_right = info_fmt + + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_names_[node] + threshold = tree_.threshold[node] + threshold = "{1:.{0}f}".format(decimals, threshold) + export_text.report += right_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_left + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature" + +str(i_tree)+"_"+ str(name)+ " "+ str(previous_id) + " " + str(prevfeature) + " "+ + str(result) + " => " + str(current_id) + " " + str(int(float(threshold)))+"\n") + global first_entry + global entry_info + global Exact_Table + + Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature"+str(i_tree)+"_"+ str(name), + "level_"+str(i_tree)+ "_"+str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), str(name) , + str(int(float(threshold)))] + Exact_Table['node table counter'] += 1 + + if first_entry: + first_entry = False + entry_info += [[previous_id, prevfeature, result]] + + print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) + + export_text.report += left_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_right + # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) + + print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) + else: # leaf + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + subtree_depth = _compute_depth(tree_, node) + if subtree_depth == 1: + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + trunc_report = 'truncated branch of depth %d' % subtree_depth + export_text.report += truncation_fmt.format(indent, + trunc_report) + + print_tree_recurse(0, 1, 0, 1, global_id, fname) + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 3) ') or '3') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 2) ') or '2') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 10000) ') or '10000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + # Random Forest + + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' + # refresh the command (Table) file + with open(fname, 'w') as command: + command.write('') + + global global_id + global i_tree + global first_entry + global entry_info + global Exact_Table + + i_tree = 0 + global_id = 0 + entry_info = [] + Exact_Table = {} + Exact_Table['node table'] = {} + Exact_Table['node table counter'] = 0 + + for idx, estimator in enumerate(rfc.estimators_): + with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: + f.write('') + with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: + get_lineage(estimator, feature_names, f) + first_entry = True + i_tree = i_tree + 1 + export_p4(estimator, fname) + # print(entry_info) + + g_table = {} + print("Generating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + g_table['decision'] = g_table['votes to class'] + + collect_class = [] + for idx in g_table['decision']: + collect_class += [g_table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Exact_Table['decision'] = {} + for idx in g_table['decision']: + if g_table['decision'][idx]['class'] != default_class: + Exact_Table['decision'][code_table_size] = g_table['decision'][idx] + code_table_size += 1 + + + + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Depth_Based_Table.txt and Exact_Table.json is generated') + + + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']["number of depth"] = num_depth + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["first entry info"] = entry_info + Planter_config['p4 config']["default label"] = default_class + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + print('The python simulation test does not support this model, please do the following emulation test on the software switch.') + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/RF/Type_DM_bmv2_only/dedicated_p4.py b/src/models/RF/Type_DM_bmv2_only/dedicated_p4.py index 844aa1f..f9609c8 100755 --- a/src/models/RF/Type_DM_bmv2_only/dedicated_p4.py +++ b/src/models/RF/Type_DM_bmv2_only/dedicated_p4.py @@ -1,273 +1,273 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['num_depth'] = config_file["number of depth"] - config['model'] = config_file['model'] - config["decision_table_size"] = config_file["decision table size"] - config['default label'] = config_file["default label"] - config['first_entry'] = config_file["first entry info"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n" - "#define CLASS_NOT_SET 10\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - - # headers.write("#define CLASS_NOT_SET 10\n\n") - # headers.write("struct metadata_t {\n") - for t in range(0, config['num_trees']): - headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") - headers.write(" bit<16> node_id;\n" - " bit<16> prevFeature;\n" - " bit<16> isTrue;\n" - " bit<32> DstAddr;\n") - # "}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") - ingress.write("\n") - for t in range(0, config['num_trees']): - ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" - " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" - " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") - for d in range(0, config['num_depth']): - ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") - ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") - ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") - ingress.write(" " + (config['num_depth']) * "} " + "\n\n") - - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" - " bit<32> feature = 0;\n" - # " bit<32> th = threshold ;\n" - " bit<16> f = f_inout ;\n") - for f in range(0, config['num_features']): - ingress.write(" if (f == "+str(f)+") {\n" - " feature = hdr.Planter.feature"+str(f)+";\n" - " }\n") - ingress.write(" bit<32> th = threshold - feature;\n" - # " if (feature <= th){\n" # if (feature <= th){ - " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ - " meta.isTrue = 1;\n" - " }else{\n" - " meta.isTrue = 0;\n" - " }\n" - " meta.prevFeature = f;\n" - " meta.node_id = node_id;\n") - - ingress.write(" }\n\n") - - for t in range(0, config['num_trees']): - ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" - " meta.tree_" + str(t+1) + "_vote = class;\n" - " meta.node_id = node_id; // just for debugging otherwise not needed\n" - " }\n") - - - for t in range(0, config['num_trees']): - for d in range(0, config['num_depth']+1): - ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" - " key = {\n" - " meta.node_id: exact;\n" - " meta.prevFeature: exact;\n" - " meta.isTrue: exact;\n" - " }\n" - " actions = {\n" - " NoAction;\n" - " CheckFeature;\n" - " SetClass"+str(t+1)+";\n" - " }\n" - " size = 1024;\n" - " }\n\n") - - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_command(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Table = json.load(open('Tables/Exact_Table.json', 'r')) - - fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' - total_entries = 0 - with open(fname_current, 'a') as file: - for idx in Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") - file.write("=> " + str(Table['decision'][idx]['class']) + "\n") - total_entries += 1 - - with open(fname, 'w') as command: - command.write('') - current_file = open(fname_current, 'r') - - for line in current_file: - new_file = open(fname, 'a') # Use append mode here - new_file.write(line) - total_entries += 1 - print('Actual exact table entries:', total_entries, '...', end='') - current_file.close() - new_file.close() - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - table = json.load(open('./Tables/Exact_Table.json', 'r')) - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - - # Load decision tables - tload.write("print('load table for each nodes')\n") - for idx in table['node table']: - if table['node table'][idx][0] == "CheckFeature": - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] + ', ' + table['node table'][idx][7] - else: - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] - - tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ - '('+key_and_values+')\n') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['num_depth'] = config_file["number of depth"] + config['model'] = config_file['model'] + config["decision_table_size"] = config_file["decision table size"] + config['default label'] = config_file["default label"] + config['first_entry'] = config_file["first entry info"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n" + "#define CLASS_NOT_SET 10\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + + # headers.write("#define CLASS_NOT_SET 10\n\n") + # headers.write("struct metadata_t {\n") + for t in range(0, config['num_trees']): + headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") + headers.write(" bit<16> node_id;\n" + " bit<16> prevFeature;\n" + " bit<16> isTrue;\n" + " bit<32> DstAddr;\n") + # "}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") + ingress.write("\n") + for t in range(0, config['num_trees']): + ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" + " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" + " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") + for d in range(0, config['num_depth']): + ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") + ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") + ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") + ingress.write(" " + (config['num_depth']) * "} " + "\n\n") + + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" + " bit<32> feature = 0;\n" + # " bit<32> th = threshold ;\n" + " bit<16> f = f_inout ;\n") + for f in range(0, config['num_features']): + ingress.write(" if (f == "+str(f)+") {\n" + " feature = hdr.Planter.feature"+str(f)+";\n" + " }\n") + ingress.write(" bit<32> th = threshold - feature;\n" + # " if (feature <= th){\n" # if (feature <= th){ + " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ + " meta.isTrue = 1;\n" + " }else{\n" + " meta.isTrue = 0;\n" + " }\n" + " meta.prevFeature = f;\n" + " meta.node_id = node_id;\n") + + ingress.write(" }\n\n") + + for t in range(0, config['num_trees']): + ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" + " meta.tree_" + str(t+1) + "_vote = class;\n" + " meta.node_id = node_id; // just for debugging otherwise not needed\n" + " }\n") + + + for t in range(0, config['num_trees']): + for d in range(0, config['num_depth']+1): + ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" + " key = {\n" + " meta.node_id: exact;\n" + " meta.prevFeature: exact;\n" + " meta.isTrue: exact;\n" + " }\n" + " actions = {\n" + " NoAction;\n" + " CheckFeature;\n" + " SetClass"+str(t+1)+";\n" + " }\n" + " size = 1024;\n" + " }\n\n") + + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_command(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Table = json.load(open('Tables/Exact_Table.json', 'r')) + + fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' + total_entries = 0 + with open(fname_current, 'a') as file: + for idx in Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") + file.write("=> " + str(Table['decision'][idx]['class']) + "\n") + total_entries += 1 + + with open(fname, 'w') as command: + command.write('') + current_file = open(fname_current, 'r') + + for line in current_file: + new_file = open(fname, 'a') # Use append mode here + new_file.write(line) + total_entries += 1 + print('Actual exact table entries:', total_entries, '...', end='') + current_file.close() + new_file.close() + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + table = json.load(open('./Tables/Exact_Table.json', 'r')) + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + + # Load decision tables + tload.write("print('load table for each nodes')\n") + for idx in table['node table']: + if table['node table'][idx][0] == "CheckFeature": + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + ', ' + table['node table'][idx][7] + else: + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + + tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ + '('+key_and_values+')\n') diff --git a/src/models/RF/Type_DM_bmv2_only/readme.md b/src/models/RF/Type_DM_bmv2_only/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_DM_bmv2_only/readme.md +++ b/src/models/RF/Type_DM_bmv2_only/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_DM_bmv2_only/table_generator.py b/src/models/RF/Type_DM_bmv2_only/table_generator.py index 17598d8..18ca69f 100755 --- a/src/models/RF/Type_DM_bmv2_only/table_generator.py +++ b/src/models/RF/Type_DM_bmv2_only/table_generator.py @@ -1,394 +1,394 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh -# If you want to use this type of model, -# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os -import time -from sklearn import tree -from sklearn.tree import export_text -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier - -# i_tree = 0 -# -# global_id = 0 - - -def export_p4(decision_tree, fname): - tree_ = decision_tree.tree_ - class_names = decision_tree.classes_ - right_child_fmt = "{} {} <= {}\n" - left_child_fmt = "{} {} > {}\n" - truncation_fmt = "{} {}\n" - feature_names_ = ["{}".format(i) for i in tree_.feature] - export_text.report = "" - max_depth = 10 - spacing = 3 - decimals = 2 - show_weights = False - - if isinstance(decision_tree, DecisionTreeClassifier): - value_fmt = "{}{} weights: {}\n" - if not show_weights: - value_fmt = "{}{}{}\n" - else: - value_fmt = "{}{} value: {}\n" - - def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): - global global_id - global i_tree - global Exact_Table - - current_id = global_id - - val = '' - is_classification = isinstance(decision_tree, - DecisionTreeClassifier) - if show_weights or not is_classification: - val = ["{1:.{0}f}, ".format(decimals, v) for v in value] - val = '[' + ''.join(val)[:-2] + ']' - if is_classification: - val += ' class: ' + str(class_name) - export_text.report += value_fmt.format(indent, '', val) - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ - " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ - str(int(float(class_name))) +"\n") - - Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), - "level_" + str(i_tree) + "_" + str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), - str(int(float(class_name)))] - Exact_Table['node table counter'] += 1 - - - - - - def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): - indent = ("|" + (" " * spacing)) * depth - indent = indent[:-spacing] + "-" * spacing - global global_id - global i_tree - global Exact_Table - - global_id = global_id + 1 - current_id = global_id - - value = None - if tree_.n_outputs == 1: - value = tree_.value[node][0] - else: - value = tree_.value[node].T[0] - class_name = np.argmax(value) - - if (tree_.n_classes[0] != 1 and - tree_.n_outputs == 1): - class_name = class_names[class_name] - - if depth <= max_depth + 1: - info_fmt = "" - info_fmt_left = info_fmt - info_fmt_right = info_fmt - - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_names_[node] - threshold = tree_.threshold[node] - threshold = "{1:.{0}f}".format(decimals, threshold) - export_text.report += right_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_left - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ - str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + - " " + str(name) + " " + str(int(float(threshold)))+"\n") - global first_entry - global entry_info - global Exact_Table - - Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), str(name) , - str(int(float(threshold)))] - Exact_Table['node table counter'] += 1 - - if first_entry: - first_entry = False - entry_info += [[previous_id, prevfeature, result]] - - print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) - - export_text.report += left_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_right - # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) - - print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) - else: # leaf - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - subtree_depth = _compute_depth(tree_, node) - if subtree_depth == 1: - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - trunc_report = 'truncated branch of depth %d' % subtree_depth - export_text.report += truncation_fmt.format(indent, - trunc_report) - - print_tree_recurse(0, 1, 0, 1, global_id, fname) - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Random Forest - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' - # refresh the command (Table) file - with open(fname, 'w') as command: - command.write('') - - global global_id - global i_tree - global first_entry - global entry_info - global Exact_Table - - i_tree = 0 - global_id = 0 - entry_info = [] - Exact_Table = {} - Exact_Table['node table'] = {} - Exact_Table['node table counter'] = 0 - - for idx, estimator in enumerate(rfc.estimators_): - with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: - f.write('') - with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: - get_lineage(estimator, feature_names, f) - first_entry = True - i_tree = i_tree + 1 - export_p4(estimator, fname) - # print(entry_info) - - g_table = {} - print("Generating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - g_table['decision'] = g_table['votes to class'] - - collect_class = [] - for idx in g_table['decision']: - collect_class += [g_table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Exact_Table['decision'] = {} - for idx in g_table['decision']: - if g_table['decision'][idx]['class'] != default_class: - Exact_Table['decision'][code_table_size] = g_table['decision'][idx] - code_table_size += 1 - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Depth_Based_Table.txt and Exact_Table.json is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']["number of depth"] = num_depth - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["first entry info"] = entry_info - Planter_config['p4 config']["default label"] = default_class - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - print('The python simulation test does not support this model, please do the following emulation test on the software switch.') - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of trees"]*Planter_config['p4 config']["number of depth"]) - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh +# If you want to use this type of model, +# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os +import time +from sklearn import tree +from sklearn.tree import export_text +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier + +# i_tree = 0 +# +# global_id = 0 + + +def export_p4(decision_tree, fname): + tree_ = decision_tree.tree_ + class_names = decision_tree.classes_ + right_child_fmt = "{} {} <= {}\n" + left_child_fmt = "{} {} > {}\n" + truncation_fmt = "{} {}\n" + feature_names_ = ["{}".format(i) for i in tree_.feature] + export_text.report = "" + max_depth = 10 + spacing = 3 + decimals = 2 + show_weights = False + + if isinstance(decision_tree, DecisionTreeClassifier): + value_fmt = "{}{} weights: {}\n" + if not show_weights: + value_fmt = "{}{}{}\n" + else: + value_fmt = "{}{} value: {}\n" + + def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): + global global_id + global i_tree + global Exact_Table + + current_id = global_id + + val = '' + is_classification = isinstance(decision_tree, + DecisionTreeClassifier) + if show_weights or not is_classification: + val = ["{1:.{0}f}, ".format(decimals, v) for v in value] + val = '[' + ''.join(val)[:-2] + ']' + if is_classification: + val += ' class: ' + str(class_name) + export_text.report += value_fmt.format(indent, '', val) + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ + " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ + str(int(float(class_name))) +"\n") + + Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), + "level_" + str(i_tree) + "_" + str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), + str(int(float(class_name)))] + Exact_Table['node table counter'] += 1 + + + + + + def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): + indent = ("|" + (" " * spacing)) * depth + indent = indent[:-spacing] + "-" * spacing + global global_id + global i_tree + global Exact_Table + + global_id = global_id + 1 + current_id = global_id + + value = None + if tree_.n_outputs == 1: + value = tree_.value[node][0] + else: + value = tree_.value[node].T[0] + class_name = np.argmax(value) + + if (tree_.n_classes[0] != 1 and + tree_.n_outputs == 1): + class_name = class_names[class_name] + + if depth <= max_depth + 1: + info_fmt = "" + info_fmt_left = info_fmt + info_fmt_right = info_fmt + + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_names_[node] + threshold = tree_.threshold[node] + threshold = "{1:.{0}f}".format(decimals, threshold) + export_text.report += right_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_left + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ + str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + + " " + str(name) + " " + str(int(float(threshold)))+"\n") + global first_entry + global entry_info + global Exact_Table + + Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), str(name) , + str(int(float(threshold)))] + Exact_Table['node table counter'] += 1 + + if first_entry: + first_entry = False + entry_info += [[previous_id, prevfeature, result]] + + print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) + + export_text.report += left_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_right + # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) + + print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) + else: # leaf + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + subtree_depth = _compute_depth(tree_, node) + if subtree_depth == 1: + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + trunc_report = 'truncated branch of depth %d' % subtree_depth + export_text.report += truncation_fmt.format(indent, + trunc_report) + + print_tree_recurse(0, 1, 0, 1, global_id, fname) + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Random Forest + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' + # refresh the command (Table) file + with open(fname, 'w') as command: + command.write('') + + global global_id + global i_tree + global first_entry + global entry_info + global Exact_Table + + i_tree = 0 + global_id = 0 + entry_info = [] + Exact_Table = {} + Exact_Table['node table'] = {} + Exact_Table['node table counter'] = 0 + + for idx, estimator in enumerate(rfc.estimators_): + with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: + f.write('') + with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: + get_lineage(estimator, feature_names, f) + first_entry = True + i_tree = i_tree + 1 + export_p4(estimator, fname) + # print(entry_info) + + g_table = {} + print("Generating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + g_table['decision'] = g_table['votes to class'] + + collect_class = [] + for idx in g_table['decision']: + collect_class += [g_table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Exact_Table['decision'] = {} + for idx in g_table['decision']: + if g_table['decision'][idx]['class'] != default_class: + Exact_Table['decision'][code_table_size] = g_table['decision'][idx] + code_table_size += 1 + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Depth_Based_Table.txt and Exact_Table.json is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']["number of depth"] = num_depth + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["first entry info"] = entry_info + Planter_config['p4 config']["default label"] = default_class + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + print('The python simulation test does not support this model, please do the following emulation test on the software switch.') + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of trees"]*Planter_config['p4 config']["number of depth"]) + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/RF/Type_EB/dedicated_p4.py b/src/models/RF/Type_EB/dedicated_p4.py index 602d8b7..da5e638 100755 --- a/src/models/RF/Type_EB/dedicated_p4.py +++ b/src/models/RF/Type_EB/dedicated_p4.py @@ -1,311 +1,311 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in Ternary_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Ternary_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + - "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in Ternary_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Ternary_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/RF/Type_EB/readme.md b/src/models/RF/Type_EB/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_EB/readme.md +++ b/src/models/RF/Type_EB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_EB/table_generator.py b/src/models/RF/Type_EB/table_generator.py index b9dee96..84b6f0c 100755 --- a/src/models/RF/Type_EB/table_generator.py +++ b/src/models/RF/Type_EB/table_generator.py @@ -1,586 +1,586 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -import time -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(int(feature_max[i]+1)): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if l == 0: - text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - else: - text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Random Forest - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) - # print('stop') - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - Ternary_Table['tree '+str(t)] = {} - for idx in Exact_Table['tree '+str(t)]: - if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: - Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] - code_table_size += 1 - Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) - - # ===================== prepare default class ========================= - - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [ Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Ternary_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - table_name = 'Ternary_Table.json' - json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree '+str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature '+str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") - - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +import time +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(int(feature_max[i]+1)): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if l == 0: + text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + else: + text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 5) ') or '5') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Random Forest + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) + # print('stop') + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + Ternary_Table['tree '+str(t)] = {} + for idx in Exact_Table['tree '+str(t)]: + if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: + Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] + code_table_size += 1 + Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) + + # ===================== prepare default class ========================= + + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [ Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Ternary_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + table_name = 'Ternary_Table.json' + json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree '+str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature '+str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") + + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/RF/Type_EB_auto/dedicated_p4.py b/src/models/RF/Type_EB_auto/dedicated_p4.py index 3e49fff..ba5a0ab 100755 --- a/src/models/RF/Type_EB_auto/dedicated_p4.py +++ b/src/models/RF/Type_EB_auto/dedicated_p4.py @@ -1,323 +1,323 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in Ternary_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Ternary_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - # Load feature tables - # for i in range(0, config['num_features']): - # tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( - # i) + "'].keys()),'entries')\n" \ - # "for key in table['feature " + str(i) + "']:\n") - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - # tload.write(" codes = ten_to_bin(int(table['feature " + str( - # i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - # - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - # tload.write(" " + (config['debug_load_table'] * "# ") + \ - # "Ingress.lookup_feature" + str(i) + \ - # ".add_with_extract_feature" + str(i) + \ - # "(int(key), int(codes,2))\n") - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + - "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in Ternary_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Ternary_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + # Load feature tables + # for i in range(0, config['num_features']): + # tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( + # i) + "'].keys()),'entries')\n" \ + # "for key in table['feature " + str(i) + "']:\n") + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + # tload.write(" codes = ten_to_bin(int(table['feature " + str( + # i) + "'][key][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + # + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + # tload.write(" " + (config['debug_load_table'] * "# ") + \ + # "Ingress.lookup_feature" + str(i) + \ + # ".add_with_extract_feature" + str(i) + \ + # "(int(key), int(codes,2))\n") + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/RF/Type_EB_auto/readme.md b/src/models/RF/Type_EB_auto/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_EB_auto/readme.md +++ b/src/models/RF/Type_EB_auto/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_EB_auto/table_generator.py b/src/models/RF/Type_EB_auto/table_generator.py index 2d08009..327c406 100755 --- a/src/models/RF/Type_EB_auto/table_generator.py +++ b/src/models/RF/Type_EB_auto/table_generator.py @@ -1,601 +1,601 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import time -import math -import re -import json -import copy -import os - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -from sklearn.metrics import * - -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -from src.functions.config_modification import * -from src.functions.input_CLI import * - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - - -def print_tree(tree, feature_names): - tree_ = tree.tree_ - feature_name = [ - feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" - for i in tree_.feature - ] - # print('feature_name:', feature_name) - print("def tree({}):".format(", ".join(feature_names))) - share = {} - def recurse(node, depth, share): - indent = " " * depth - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_name[node] - share[name] = {} - threshold = tree_.threshold[node] - print("{}if {} <= {}:".format(indent, name, threshold)) - recurse(tree_.children_left[node], depth + 1, share) - print("{}else: # if {} > {}".format(indent, name, threshold)) - recurse(tree_.children_right[node], depth + 1, share) - else: - print("{}return {}".format(indent, tree_.value[node])) - recurse(0, 1, share) - - - - -def ten_to_bin(num, count): - num = bin(int(num)).lstrip('0b') - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - - - -def find_feature_split(model, tree_index, num_features): - feature_names = [] - feature_split = {} - for l in range(num_features): - feature_split["feature "+str(l)] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] - threshold = model.tree_.threshold - features = [feature_names[i] for i in model.tree_.feature] - for i, fe in enumerate(features): - for l in range(num_features): - if l == 0: - if fe == feature_names[l]: - feature_split["feature "+str(l)].append(threshold[i]) - continue - if fe == feature_names[l]: - if threshold[i] != -2.0: - feature_split["feature "+str(l)].append(threshold[i]) - continue - for l in range(num_features): - feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] - feature_split["feature "+str(l)].sort() - tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") - for l in range(num_features): - tree.write(str(feature_names[l]) + " = ") - tree.write(str(feature_split["feature "+str(l)])) - tree.write(";\n") - # print_tree(model, feature_names) - get_lineage(model, feature_names, tree) - tree.close() - action = [0, 1] - textfile = 'src/temp/tree'+str(tree_index)+'.txt' - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return textfile, feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(int(feature_max[i]+1)): - if nife !=[] : - if len(nife) > count_code: - if j-1 == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - - -def find_classification(textfile, feature_split, num_features): - fea = [] - sign = [] - num = [] - f = open(textfile, 'r') - feature_n = {} - text = r"(" - for l in range(num_features): - feature_n[l] = [] - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if l == 0: - text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - else: - text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) - text += ")" - for line in f: - n = re.findall(r"when", line) - if n: - fea.append(re.findall(text, line)) - sign.append(re.findall(r"(<=|>)", line)) - num.append(re.findall(r"\d+\.?\d*", line)) - f.close() - classfication = [] - featuren = {} - for i in range(len(fea)): - for l in range(num_features): - featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] - for j, feature in enumerate(fea[i]): - for l in range(num_features): - first_letter = int(np.floor(l / 24)) - second_letter = int(l % 24) - if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): - sig = sign[i][j] - thres = int(float(num[i][j])) - id = feature_split["feature "+str(l)].index(thres) - if sig == '<=': - while id < len(feature_split["feature "+str(l)]): - if id + 1 in featuren[l]: - featuren[l].remove(id + 1) - id = id + 1 - else: - while id >= 0: - if id in featuren[l]: - featuren[l].remove(id) - id = id - 1 - continue - for l in range(num_features): - feature_n[l].append(featuren[l]) - a = len(num[i]) - classfication.append(num[i][a - 1]) - - return feature_n, classfication - - -def find_path_for_leaf_nodes(feature_n, classfication, num_features): - path_to_leaf = {} - for i in range(len(classfication)): - path_to_leaf["path "+str(i)] = {} - path_to_leaf["path " + str(i)]["leaf"] = classfication[i] - for j in range(num_features): - path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] - return path_to_leaf - - - - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model, tree_index, num_features, g_table, feature_max): - textfile, feature_split = find_feature_split(model, tree_index, num_features) - - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - - feature_n, classfication = find_classification(textfile, feature_split , num_features) - path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - - Planter_config = reload_config('src/configs/Planter_config.json') - - # =================== set tree numbers in config =================== - question = 'Number of trees?' - default = 5 - Planter_config = take_CLI_input(Planter_config, 'model config', 'number of trees', question, default, - manually_input = False, numeric=True) - - # =================== set depth in config =================== - question = 'Number of depth?' - default = 4 - Planter_config = take_CLI_input(Planter_config, 'model config', 'number of depth', question, default, - manually_input = False, numeric=True) - - # =================== set max leaf node in config =================== - question = 'Number of leaf nodes?' - default = 1000 - Planter_config = take_CLI_input(Planter_config, 'model config', 'max number of leaf nodes', question, default, - manually_input = False, numeric=True) - - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Random Forest - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - g_table = {} - for idx, estimator in enumerate(rfc.estimators_): - g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) - - print("\nGenerating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - - feature_width = [] - for max_f in feature_max: - feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' +str (i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree '+str(t)]: - collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - Ternary_Table['tree '+str(t)] = {} - for idx in Exact_Table['tree '+str(t)]: - if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: - Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] - code_table_size += 1 - Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) - - # ===================== prepare default class ========================= - - collect_class = [] - for idx in Exact_Table['decision']: - collect_class += [ Exact_Table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Ternary_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - table_name = 'Ternary_Table.json' - json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) - print('\nTernary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys())+1 - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree '+str(tree)].keys())+1] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature '+str(i)].keys())+1] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] - if 'test config' not in Planter_config.keys(): - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - # dump the config file - dump_config(Planter_config, 'src/configs/Planter_config.json') - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of trees'] - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - # print('decision(vote to class) table not matched', vote_list) - switch_prediction = Planter_config['p4 config']["default label"] - # print(test_y) - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") - - - print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import time +import math +import re +import json +import copy +import os + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import * + +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +from src.functions.config_modification import * +from src.functions.input_CLI import * + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + + +def print_tree(tree, feature_names): + tree_ = tree.tree_ + feature_name = [ + feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!" + for i in tree_.feature + ] + # print('feature_name:', feature_name) + print("def tree({}):".format(", ".join(feature_names))) + share = {} + def recurse(node, depth, share): + indent = " " * depth + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_name[node] + share[name] = {} + threshold = tree_.threshold[node] + print("{}if {} <= {}:".format(indent, name, threshold)) + recurse(tree_.children_left[node], depth + 1, share) + print("{}else: # if {} > {}".format(indent, name, threshold)) + recurse(tree_.children_right[node], depth + 1, share) + else: + print("{}return {}".format(indent, tree_.value[node])) + recurse(0, 1, share) + + + + +def ten_to_bin(num, count): + num = bin(int(num)).lstrip('0b') + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + + + +def find_feature_split(model, tree_index, num_features): + feature_names = [] + feature_split = {} + for l in range(num_features): + feature_split["feature "+str(l)] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + feature_names += ["f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter)] + threshold = model.tree_.threshold + features = [feature_names[i] for i in model.tree_.feature] + for i, fe in enumerate(features): + for l in range(num_features): + if l == 0: + if fe == feature_names[l]: + feature_split["feature "+str(l)].append(threshold[i]) + continue + if fe == feature_names[l]: + if threshold[i] != -2.0: + feature_split["feature "+str(l)].append(threshold[i]) + continue + for l in range(num_features): + feature_split["feature "+str(l)] = [int(np.floor(i)) for i in feature_split["feature "+str(l)]] + feature_split["feature "+str(l)].sort() + tree = open('src/temp/tree'+str(tree_index)+'.txt', "w+") + for l in range(num_features): + tree.write(str(feature_names[l]) + " = ") + tree.write(str(feature_split["feature "+str(l)])) + tree.write(";\n") + # print_tree(model, feature_names) + get_lineage(model, feature_names, tree) + tree.close() + action = [0, 1] + textfile = 'src/temp/tree'+str(tree_index)+'.txt' + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return textfile, feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(int(feature_max[i]+1)): + if nife !=[] : + if len(nife) > count_code: + if j-1 == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + + +def find_classification(textfile, feature_split, num_features): + fea = [] + sign = [] + num = [] + f = open(textfile, 'r') + feature_n = {} + text = r"(" + for l in range(num_features): + feature_n[l] = [] + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if l == 0: + text += "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + else: + text += "|f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter) + text += ")" + for line in f: + n = re.findall(r"when", line) + if n: + fea.append(re.findall(text, line)) + sign.append(re.findall(r"(<=|>)", line)) + num.append(re.findall(r"\d+\.?\d*", line)) + f.close() + classfication = [] + featuren = {} + for i in range(len(fea)): + for l in range(num_features): + featuren[l] = [k for k in range(len(feature_split["feature "+str(l)]) + 1)] + for j, feature in enumerate(fea[i]): + for l in range(num_features): + first_letter = int(np.floor(l / 24)) + second_letter = int(l % 24) + if feature == "f" + chr(ord('A') + first_letter) + chr(ord('A') + second_letter): + sig = sign[i][j] + thres = int(float(num[i][j])) + id = feature_split["feature "+str(l)].index(thres) + if sig == '<=': + while id < len(feature_split["feature "+str(l)]): + if id + 1 in featuren[l]: + featuren[l].remove(id + 1) + id = id + 1 + else: + while id >= 0: + if id in featuren[l]: + featuren[l].remove(id) + id = id - 1 + continue + for l in range(num_features): + feature_n[l].append(featuren[l]) + a = len(num[i]) + classfication.append(num[i][a - 1]) + + return feature_n, classfication + + +def find_path_for_leaf_nodes(feature_n, classfication, num_features): + path_to_leaf = {} + for i in range(len(classfication)): + path_to_leaf["path "+str(i)] = {} + path_to_leaf["path " + str(i)]["leaf"] = classfication[i] + for j in range(num_features): + path_to_leaf["path " + str(i)]["feature "+str(j)] = feature_n[j][i] + return path_to_leaf + + + + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model, tree_index, num_features, g_table, feature_max): + textfile, feature_split = find_feature_split(model, tree_index, num_features) + + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + + feature_n, classfication = find_classification(textfile, feature_split , num_features) + path_to_leaf = find_path_for_leaf_nodes(feature_n, classfication, num_features) + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = int(np.ceil(math.log(g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))]+1,2))) or 1 + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + + Planter_config = reload_config('src/configs/Planter_config.json') + + # =================== set tree numbers in config =================== + question = 'Number of trees?' + default = 5 + Planter_config = take_CLI_input(Planter_config, 'model config', 'number of trees', question, default, + manually_input = False, numeric=True) + + # =================== set depth in config =================== + question = 'Number of depth?' + default = 4 + Planter_config = take_CLI_input(Planter_config, 'model config', 'number of depth', question, default, + manually_input = False, numeric=True) + + # =================== set max leaf node in config =================== + question = 'Number of leaf nodes?' + default = 1000 + Planter_config = take_CLI_input(Planter_config, 'model config', 'max number of leaf nodes', question, default, + manually_input = False, numeric=True) + + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Random Forest + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + g_table = {} + for idx, estimator in enumerate(rfc.estimators_): + g_table = generate_table(estimator, idx, num_features ,g_table, feature_max) + + print("\nGenerating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + + feature_width = [] + for max_f in feature_max: + feature_width += [int(np.ceil(math.log(max_f, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = int(np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1)) or 1 + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' +str (i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree '+str(t)]: + collect_votes += [int(Exact_Table['tree '+str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + Ternary_Table['tree '+str(t)] = {} + for idx in Exact_Table['tree '+str(t)]: + if int(Exact_Table['tree '+str(t)][idx]['leaf']) != default_vote: + Ternary_Table['tree '+str(t)][code_table_size] = Exact_Table['tree '+str(t)][idx] + code_table_size += 1 + Exact_Table['tree '+str(t)] = copy.deepcopy(Ternary_Table['tree '+str(t)]) + + # ===================== prepare default class ========================= + + collect_class = [] + for idx in Exact_Table['decision']: + collect_class += [ Exact_Table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Ternary_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + table_name = 'Ternary_Table.json' + json.dump(Ternary_Table, open('Tables/'+table_name, 'w'), indent=4) + print('\nTernary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys())+1 + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree '+str(tree)].keys())+1] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature '+str(i)].keys())+1] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = [ "ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag" ] + if 'test config' not in Planter_config.keys(): + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + # dump the config file + dump_config(Planter_config, 'src/configs/Planter_config.json') + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of trees'] + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + # print('decision(vote to class) table not matched', vote_list) + switch_prediction = Planter_config['p4 config']["default label"] + # print(test_y) + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end=" ") + + + print('\nThe accuracy of the match action format of Random Forest is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') \ No newline at end of file diff --git a/src/models/RF/Type_depth_based/dedicated_p4.py b/src/models/RF/Type_depth_based/dedicated_p4.py index 7f20baa..6126ebc 100755 --- a/src/models/RF/Type_depth_based/dedicated_p4.py +++ b/src/models/RF/Type_depth_based/dedicated_p4.py @@ -1,262 +1,262 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['num_depth'] = config_file["number of depth"] - config['model'] = config_file['model'] - config["decision_table_size"] = config_file["decision table size"] - config['default label'] = config_file["default label"] - config['first_entry'] = config_file["first entry info"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n" - "#define CLASS_NOT_SET 10\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - - # headers.write("#define CLASS_NOT_SET 10\n\n") - # headers.write("struct metadata_t {\n") - for t in range(0, config['num_trees']): - headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") - - for t in range(0, config['num_trees']): - headers.write(" bit<16> node_id"+str(t+1)+";\n" - " bit<16> prevFeature"+str(t+1)+";\n" - " bit<16> isTrue"+str(t+1)+";\n" - " bit<32> th" + str(t+1) + ";\n") - headers.write(" bit<32> DstAddr;\n") - # "}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") - ingress.write("\n") - for t in range(0, config['num_trees']): - ingress.write(" meta.node_id"+str(t+1)+" = "+str(config['first_entry'][t][0])+";\n" - " meta.prevFeature"+str(t+1)+" = "+str(config['first_entry'][t][1])+";\n" - " meta.isTrue"+str(t+1)+" = "+str(config['first_entry'][t][2])+";\n") - for d in range(0, config['num_depth']): - ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n" - " "+d*" "+"if (meta.th"+str(t+1)+" & 0b10000000000000000000000000000000 == 0) meta.isTrue"+str(t+1)+" = 1;\n" - " "+d*" "+"else meta.isTrue"+str(t+1)+" = 0;\n") - ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") - ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") - ingress.write(" " + (config['num_depth']) * "} " + "\n\n") - - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - for f in range(0, config['num_features']): - ingress.write(" action CheckFeature"+str(t+1)+"_"+str(f)+"(bit<16> node_id, bit<32> threshold) {\n" - " meta.th"+str(t+1)+" = threshold - meta.feature"+str(f)+";\n" - " meta.prevFeature"+str(t+1)+" = "+str(f)+";\n" - " meta.node_id"+str(t+1)+" = node_id;\n" - " }\n\n") - - for t in range(0, config['num_trees']): - ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" - " meta.tree_" + str(t+1) + "_vote = class;\n" - " meta.node_id" + str(t+1) + " = node_id; // just for debugging otherwise not needed\n" - " }\n") - - - for t in range(0, config['num_trees']): - for d in range(0, config['num_depth']+1): - ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" - " key = {\n" - " meta.node_id" + str(t+1) + ": exact;\n" - " meta.prevFeature" + str(t+1) + ": exact;\n" - " meta.isTrue" + str(t+1) + ": exact;\n" - " }\n" - " actions = {\n" - " NoAction;\n") - for f in range(0, config['num_features']): - ingress.write(" CheckFeature"+str(t+1)+"_"+str(f)+";\n") - ingress.write(" SetClass"+str(t+1)+";\n" - " }\n" - " size = 1024;\n" - " }\n\n") - - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_command(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Table = json.load(open('Tables/Exact_Table.json', 'r')) - - fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' - - with open(fname_current, 'a') as file: - for idx in Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") - file.write("=> " + str(Table['decision'][idx]['class']) + "\n") - - with open(fname, 'w') as command: - command.write('') - current_file = open(fname_current, 'r') - for line in current_file: - new_file = open(fname, 'a') # Use append mode here - new_file.write(line) - current_file.close() - new_file.close() - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - table = json.load(open('./Tables/Exact_Table.json', 'r')) - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - - # Load decision tables - tload.write("print('load table for each nodes')\n") - for idx in table['node table']: - if table['node table'][idx][0] == "CheckFeature": - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][7] - else: - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] - - tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ - '('+key_and_values+')\n') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['num_depth'] = config_file["number of depth"] + config['model'] = config_file['model'] + config["decision_table_size"] = config_file["decision table size"] + config['default label'] = config_file["default label"] + config['first_entry'] = config_file["first entry info"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n" + "#define CLASS_NOT_SET 10\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + + # headers.write("#define CLASS_NOT_SET 10\n\n") + # headers.write("struct metadata_t {\n") + for t in range(0, config['num_trees']): + headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") + + for t in range(0, config['num_trees']): + headers.write(" bit<16> node_id"+str(t+1)+";\n" + " bit<16> prevFeature"+str(t+1)+";\n" + " bit<16> isTrue"+str(t+1)+";\n" + " bit<32> th" + str(t+1) + ";\n") + headers.write(" bit<32> DstAddr;\n") + # "}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") + ingress.write("\n") + for t in range(0, config['num_trees']): + ingress.write(" meta.node_id"+str(t+1)+" = "+str(config['first_entry'][t][0])+";\n" + " meta.prevFeature"+str(t+1)+" = "+str(config['first_entry'][t][1])+";\n" + " meta.isTrue"+str(t+1)+" = "+str(config['first_entry'][t][2])+";\n") + for d in range(0, config['num_depth']): + ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n" + " "+d*" "+"if (meta.th"+str(t+1)+" & 0b10000000000000000000000000000000 == 0) meta.isTrue"+str(t+1)+" = 1;\n" + " "+d*" "+"else meta.isTrue"+str(t+1)+" = 0;\n") + ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") + ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") + ingress.write(" " + (config['num_depth']) * "} " + "\n\n") + + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + for f in range(0, config['num_features']): + ingress.write(" action CheckFeature"+str(t+1)+"_"+str(f)+"(bit<16> node_id, bit<32> threshold) {\n" + " meta.th"+str(t+1)+" = threshold - meta.feature"+str(f)+";\n" + " meta.prevFeature"+str(t+1)+" = "+str(f)+";\n" + " meta.node_id"+str(t+1)+" = node_id;\n" + " }\n\n") + + for t in range(0, config['num_trees']): + ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" + " meta.tree_" + str(t+1) + "_vote = class;\n" + " meta.node_id" + str(t+1) + " = node_id; // just for debugging otherwise not needed\n" + " }\n") + + + for t in range(0, config['num_trees']): + for d in range(0, config['num_depth']+1): + ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" + " key = {\n" + " meta.node_id" + str(t+1) + ": exact;\n" + " meta.prevFeature" + str(t+1) + ": exact;\n" + " meta.isTrue" + str(t+1) + ": exact;\n" + " }\n" + " actions = {\n" + " NoAction;\n") + for f in range(0, config['num_features']): + ingress.write(" CheckFeature"+str(t+1)+"_"+str(f)+";\n") + ingress.write(" SetClass"+str(t+1)+";\n" + " }\n" + " size = 1024;\n" + " }\n\n") + + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_command(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Table = json.load(open('Tables/Exact_Table.json', 'r')) + + fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' + + with open(fname_current, 'a') as file: + for idx in Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") + file.write("=> " + str(Table['decision'][idx]['class']) + "\n") + + with open(fname, 'w') as command: + command.write('') + current_file = open(fname_current, 'r') + for line in current_file: + new_file = open(fname, 'a') # Use append mode here + new_file.write(line) + current_file.close() + new_file.close() + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + table = json.load(open('./Tables/Exact_Table.json', 'r')) + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + + # Load decision tables + tload.write("print('load table for each nodes')\n") + for idx in table['node table']: + if table['node table'][idx][0] == "CheckFeature": + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][7] + else: + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + + tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ + '('+key_and_values+')\n') diff --git a/src/models/RF/Type_depth_based/readme.md b/src/models/RF/Type_depth_based/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_depth_based/readme.md +++ b/src/models/RF/Type_depth_based/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_depth_based/table_generator.py b/src/models/RF/Type_depth_based/table_generator.py index d650f94..0d79aa5 100755 --- a/src/models/RF/Type_depth_based/table_generator.py +++ b/src/models/RF/Type_depth_based/table_generator.py @@ -1,379 +1,379 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh -# If you want to use this type of model, -# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os - -from sklearn import tree -from sklearn.tree import export_text -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier - -# i_tree = 0 -# -# global_id = 0 - - -def export_p4(decision_tree, fname): - tree_ = decision_tree.tree_ - class_names = decision_tree.classes_ - right_child_fmt = "{} {} <= {}\n" - left_child_fmt = "{} {} > {}\n" - truncation_fmt = "{} {}\n" - feature_names_ = ["{}".format(i) for i in tree_.feature] - export_text.report = "" - max_depth = 10 - spacing = 3 - decimals = 2 - show_weights = False - - if isinstance(decision_tree, DecisionTreeClassifier): - value_fmt = "{}{} weights: {}\n" - if not show_weights: - value_fmt = "{}{}{}\n" - else: - value_fmt = "{}{} value: {}\n" - - def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): - global global_id - global i_tree - global Exact_Table - - current_id = global_id - - val = '' - is_classification = isinstance(decision_tree, - DecisionTreeClassifier) - if show_weights or not is_classification: - val = ["{1:.{0}f}, ".format(decimals, v) for v in value] - val = '[' + ''.join(val)[:-2] + ']' - if is_classification: - val += ' class: ' + str(class_name) - export_text.report += value_fmt.format(indent, '', val) - # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, - # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ - " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ - str(int(float(class_name))) +"\n") - - Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), - "level_" + str(i_tree) + "_" + str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), - str(int(float(class_name)))] - Exact_Table['node table counter'] += 1 - - - - - - def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): - indent = ("|" + (" " * spacing)) * depth - indent = indent[:-spacing] + "-" * spacing - global global_id - global i_tree - global Exact_Table - - global_id = global_id + 1 - current_id = global_id - - value = None - if tree_.n_outputs == 1: - value = tree_.value[node][0] - else: - value = tree_.value[node].T[0] - class_name = np.argmax(value) - - if (tree_.n_classes[0] != 1 and - tree_.n_outputs == 1): - class_name = class_names[class_name] - - if depth <= max_depth + 1: - info_fmt = "" - info_fmt_left = info_fmt - info_fmt_right = info_fmt - - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_names_[node] - threshold = tree_.threshold[node] - threshold = "{1:.{0}f}".format(decimals, threshold) - export_text.report += right_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_left - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature" - +str(i_tree)+"_"+ str(name)+ " "+ str(previous_id) + " " + str(prevfeature) + " "+ - str(result) + " => " + str(current_id) + " " + str(int(float(threshold)))+"\n") - global first_entry - global entry_info - global Exact_Table - - Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature"+str(i_tree)+"_"+ str(name), - "level_"+str(i_tree)+ "_"+str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), str(name) , - str(int(float(threshold)))] - Exact_Table['node table counter'] += 1 - - if first_entry: - first_entry = False - entry_info += [[previous_id, prevfeature, result]] - - print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) - - export_text.report += left_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_right - # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) - - print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) - else: # leaf - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - subtree_depth = _compute_depth(tree_, node) - if subtree_depth == 1: - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - trunc_report = 'truncated branch of depth %d' % subtree_depth - export_text.report += truncation_fmt.format(indent, - trunc_report) - - print_tree_recurse(0, 1, 0, 1, global_id, fname) - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 3) ') or '3') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 2) ') or '2') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 10000) ') or '10000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - - # Random Forest - - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - # exit() - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' - # refresh the command (Table) file - with open(fname, 'w') as command: - command.write('') - - global global_id - global i_tree - global first_entry - global entry_info - global Exact_Table - - i_tree = 0 - global_id = 0 - entry_info = [] - Exact_Table = {} - Exact_Table['node table'] = {} - Exact_Table['node table counter'] = 0 - - for idx, estimator in enumerate(rfc.estimators_): - with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: - f.write('') - with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: - get_lineage(estimator, feature_names, f) - first_entry = True - i_tree = i_tree + 1 - export_p4(estimator, fname) - # print(entry_info) - - g_table = {} - print("Generating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - g_table['decision'] = g_table['votes to class'] - - collect_class = [] - for idx in g_table['decision']: - collect_class += [g_table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Exact_Table['decision'] = {} - for idx in g_table['decision']: - if g_table['decision'][idx]['class'] != default_class: - Exact_Table['decision'][code_table_size] = g_table['decision'][idx] - code_table_size += 1 - - - - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Depth_Based_Table.txt and Exact_Table.json is generated') - - - - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']["number of depth"] = num_depth - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["first entry info"] = entry_info - Planter_config['p4 config']["default label"] = default_class - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - print('The python simulation test does not support this model, please do the following emulation test on the software switch.') - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh +# If you want to use this type of model, +# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os + +from sklearn import tree +from sklearn.tree import export_text +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier + +# i_tree = 0 +# +# global_id = 0 + + +def export_p4(decision_tree, fname): + tree_ = decision_tree.tree_ + class_names = decision_tree.classes_ + right_child_fmt = "{} {} <= {}\n" + left_child_fmt = "{} {} > {}\n" + truncation_fmt = "{} {}\n" + feature_names_ = ["{}".format(i) for i in tree_.feature] + export_text.report = "" + max_depth = 10 + spacing = 3 + decimals = 2 + show_weights = False + + if isinstance(decision_tree, DecisionTreeClassifier): + value_fmt = "{}{} weights: {}\n" + if not show_weights: + value_fmt = "{}{}{}\n" + else: + value_fmt = "{}{} value: {}\n" + + def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): + global global_id + global i_tree + global Exact_Table + + current_id = global_id + + val = '' + is_classification = isinstance(decision_tree, + DecisionTreeClassifier) + if show_weights or not is_classification: + val = ["{1:.{0}f}, ".format(decimals, v) for v in value] + val = '[' + ''.join(val)[:-2] + ']' + if is_classification: + val += ' class: ' + str(class_name) + export_text.report += value_fmt.format(indent, '', val) + # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, + # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ + " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ + str(int(float(class_name))) +"\n") + + Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), + "level_" + str(i_tree) + "_" + str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), + str(int(float(class_name)))] + Exact_Table['node table counter'] += 1 + + + + + + def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): + indent = ("|" + (" " * spacing)) * depth + indent = indent[:-spacing] + "-" * spacing + global global_id + global i_tree + global Exact_Table + + global_id = global_id + 1 + current_id = global_id + + value = None + if tree_.n_outputs == 1: + value = tree_.value[node][0] + else: + value = tree_.value[node].T[0] + class_name = np.argmax(value) + + if (tree_.n_classes[0] != 1 and + tree_.n_outputs == 1): + class_name = class_names[class_name] + + if depth <= max_depth + 1: + info_fmt = "" + info_fmt_left = info_fmt + info_fmt_right = info_fmt + + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_names_[node] + threshold = tree_.threshold[node] + threshold = "{1:.{0}f}".format(decimals, threshold) + export_text.report += right_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_left + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature" + +str(i_tree)+"_"+ str(name)+ " "+ str(previous_id) + " " + str(prevfeature) + " "+ + str(result) + " => " + str(current_id) + " " + str(int(float(threshold)))+"\n") + global first_entry + global entry_info + global Exact_Table + + Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature"+str(i_tree)+"_"+ str(name), + "level_"+str(i_tree)+ "_"+str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), str(name) , + str(int(float(threshold)))] + Exact_Table['node table counter'] += 1 + + if first_entry: + first_entry = False + entry_info += [[previous_id, prevfeature, result]] + + print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) + + export_text.report += left_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_right + # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) + + print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) + else: # leaf + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + subtree_depth = _compute_depth(tree_, node) + if subtree_depth == 1: + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + trunc_report = 'truncated branch of depth %d' % subtree_depth + export_text.report += truncation_fmt.format(indent, + trunc_report) + + print_tree_recurse(0, 1, 0, 1, global_id, fname) + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 3) ') or '3') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 2) ') or '2') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 10000) ') or '10000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + + # Random Forest + + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + # exit() + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' + # refresh the command (Table) file + with open(fname, 'w') as command: + command.write('') + + global global_id + global i_tree + global first_entry + global entry_info + global Exact_Table + + i_tree = 0 + global_id = 0 + entry_info = [] + Exact_Table = {} + Exact_Table['node table'] = {} + Exact_Table['node table counter'] = 0 + + for idx, estimator in enumerate(rfc.estimators_): + with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: + f.write('') + with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: + get_lineage(estimator, feature_names, f) + first_entry = True + i_tree = i_tree + 1 + export_p4(estimator, fname) + # print(entry_info) + + g_table = {} + print("Generating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + g_table['decision'] = g_table['votes to class'] + + collect_class = [] + for idx in g_table['decision']: + collect_class += [g_table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Exact_Table['decision'] = {} + for idx in g_table['decision']: + if g_table['decision'][idx]['class'] != default_class: + Exact_Table['decision'][code_table_size] = g_table['decision'][idx] + code_table_size += 1 + + + + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Depth_Based_Table.txt and Exact_Table.json is generated') + + + + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']["number of depth"] = num_depth + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["first entry info"] = entry_info + Planter_config['p4 config']["default label"] = default_class + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + print('The python simulation test does not support this model, please do the following emulation test on the software switch.') + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/RF/Type_depth_based_bmv2_only/dedicated_p4.py b/src/models/RF/Type_depth_based_bmv2_only/dedicated_p4.py index 5a31329..b4c7bcc 100755 --- a/src/models/RF/Type_depth_based_bmv2_only/dedicated_p4.py +++ b/src/models/RF/Type_depth_based_bmv2_only/dedicated_p4.py @@ -1,273 +1,273 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['num_depth'] = config_file["number of depth"] - config['model'] = config_file['model'] - config["decision_table_size"] = config_file["decision table size"] - config['default label'] = config_file["default label"] - config['first_entry'] = config_file["first entry info"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n" - "#define CLASS_NOT_SET 10\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - - # headers.write("#define CLASS_NOT_SET 10\n\n") - # headers.write("struct metadata_t {\n") - for t in range(0, config['num_trees']): - headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") - headers.write(" bit<16> node_id;\n" - " bit<16> prevFeature;\n" - " bit<16> isTrue;\n" - " bit<32> DstAddr;\n") - # "}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for t in range(0, config['num_trees']): - ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") - ingress.write("\n") - for t in range(0, config['num_trees']): - ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" - " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" - " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") - for d in range(0, config['num_depth']): - ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") - ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") - ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") - ingress.write(" " + (config['num_depth']) * "} " + "\n\n") - - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - - ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" - " bit<32> feature = 0;\n" - # " bit<32> th = threshold ;\n" - " bit<16> f = f_inout ;\n") - for f in range(0, config['num_features']): - ingress.write(" if (f == "+str(f)+") {\n" - " feature = meta.feature"+str(f)+";\n" - " }\n") - ingress.write(" bit<32> th = threshold - feature;\n" - # " if (feature <= th){\n" # if (feature <= th){ - " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ - " meta.isTrue = 1;\n" - " }else{\n" - " meta.isTrue = 0;\n" - " }\n" - " meta.prevFeature = f;\n" - " meta.node_id = node_id;\n") - - ingress.write(" }\n\n") - - for t in range(0, config['num_trees']): - ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" - " meta.tree_" + str(t+1) + "_vote = class;\n" - " meta.node_id = node_id; // just for debugging otherwise not needed\n" - " }\n") - - - for t in range(0, config['num_trees']): - for d in range(0, config['num_depth']+1): - ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" - " key = {\n" - " meta.node_id: exact;\n" - " meta.prevFeature: exact;\n" - " meta.isTrue: exact;\n" - " }\n" - " actions = {\n" - " NoAction;\n" - " CheckFeature;\n" - " SetClass"+str(t+1)+";\n" - " }\n" - " size = 1024;\n" - " }\n\n") - - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_command(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Table = json.load(open('Tables/Exact_Table.json', 'r')) - - fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' - total_entries = 0 - with open(fname_current, 'a') as file: - for idx in Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") - file.write("=> " + str(Table['decision'][idx]['class']) + "\n") - total_entries += 1 - - with open(fname, 'w') as command: - command.write('') - current_file = open(fname_current, 'r') - - for line in current_file: - new_file = open(fname, 'a') # Use append mode here - new_file.write(line) - total_entries += 1 - print('Actual exact table entries:', total_entries, '...', end='') - current_file.close() - new_file.close() - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_command(commend_file, Planter_config) - - table = json.load(open('./Tables/Exact_Table.json', 'r')) - - config['debug_load_table'] = False - - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") - - # Load decision tables - tload.write("print('load table for each nodes')\n") - for idx in table['node table']: - if table['node table'][idx][0] == "CheckFeature": - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] + ', ' + table['node table'][idx][7] - else: - key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ - table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ - table['node table'][idx][6] - - tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ - '('+key_and_values+')\n') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['num_depth'] = config_file["number of depth"] + config['model'] = config_file['model'] + config["decision_table_size"] = config_file["decision table size"] + config['default label'] = config_file["default label"] + config['first_entry'] = config_file["first entry info"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n" + "#define CLASS_NOT_SET 10\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + + # headers.write("#define CLASS_NOT_SET 10\n\n") + # headers.write("struct metadata_t {\n") + for t in range(0, config['num_trees']): + headers.write(" bit<16> tree_" + str(t+1) + "_vote;\n") + headers.write(" bit<16> node_id;\n" + " bit<16> prevFeature;\n" + " bit<16> isTrue;\n" + " bit<32> DstAddr;\n") + # "}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for t in range(0, config['num_trees']): + ingress.write(" meta.tree_" + str(t+1) + "_vote = CLASS_NOT_SET;\n") + ingress.write("\n") + for t in range(0, config['num_trees']): + ingress.write(" meta.node_id = "+str(config['first_entry'][t][0])+";\n" + " meta.prevFeature = "+str(config['first_entry'][t][1])+";\n" + " meta.isTrue = "+str(config['first_entry'][t][2])+";\n") + for d in range(0, config['num_depth']): + ingress.write(" "+d*" "+"level_"+str(t+1)+"_"+str(d+1)+".apply();\n") + ingress.write(" "+d*" "+"if (meta.tree_" + str(t+1) + "_vote == CLASS_NOT_SET) {\n") + ingress.write(" " + config['num_depth'] * " " + "level_"+str(t+1)+"_"+str(config['num_depth']+1)+".apply();\n") + ingress.write(" " + (config['num_depth']) * "} " + "\n\n") + + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + + ingress.write(" action CheckFeature(bit<16> node_id, bit<16> f_inout, bit<32> threshold) {\n" + " bit<32> feature = 0;\n" + # " bit<32> th = threshold ;\n" + " bit<16> f = f_inout ;\n") + for f in range(0, config['num_features']): + ingress.write(" if (f == "+str(f)+") {\n" + " feature = meta.feature"+str(f)+";\n" + " }\n") + ingress.write(" bit<32> th = threshold - feature;\n" + # " if (feature <= th){\n" # if (feature <= th){ + " if (th & 0b1"+31*"0"+"==0){\n" # if (feature <= th){ + " meta.isTrue = 1;\n" + " }else{\n" + " meta.isTrue = 0;\n" + " }\n" + " meta.prevFeature = f;\n" + " meta.node_id = node_id;\n") + + ingress.write(" }\n\n") + + for t in range(0, config['num_trees']): + ingress.write(" action SetClass" + str(t+1) + "(bit <16> node_id, bit <16> class ) {\n" + " meta.tree_" + str(t+1) + "_vote = class;\n" + " meta.node_id = node_id; // just for debugging otherwise not needed\n" + " }\n") + + + for t in range(0, config['num_trees']): + for d in range(0, config['num_depth']+1): + ingress.write(" table level_"+str(t+1)+"_"+str(d+1)+"{\n" + " key = {\n" + " meta.node_id: exact;\n" + " meta.prevFeature: exact;\n" + " meta.isTrue: exact;\n" + " }\n" + " actions = {\n" + " NoAction;\n" + " CheckFeature;\n" + " SetClass"+str(t+1)+";\n" + " }\n" + " size = 1024;\n" + " }\n\n") + + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t+1) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_command(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Table = json.load(open('Tables/Exact_Table.json', 'r')) + + fname_current = config['directory config']['work'] + '/Tables/Depth_Based_Table.txt' + total_entries = 0 + with open(fname_current, 'a') as file: + for idx in Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Table['decision'][idx]['t' + str(t) + ' vote']) + " ") + file.write("=> " + str(Table['decision'][idx]['class']) + "\n") + total_entries += 1 + + with open(fname, 'w') as command: + command.write('') + current_file = open(fname_current, 'r') + + for line in current_file: + new_file = open(fname, 'a') # Use append mode here + new_file.write(line) + total_entries += 1 + print('Actual exact table entries:', total_entries, '...', end='') + current_file.close() + new_file.close() + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_command(commend_file, Planter_config) + + table = json.load(open('./Tables/Exact_Table.json', 'r')) + + config['debug_load_table'] = False + + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") + + # Load decision tables + tload.write("print('load table for each nodes')\n") + for idx in table['node table']: + if table['node table'][idx][0] == "CheckFeature": + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + ', ' + table['node table'][idx][7] + else: + key_and_values = table['node table'][idx][2] + ', ' + table['node table'][idx][3] + ', ' + \ + table['node table'][idx][4] + ', ' + table['node table'][idx][5] + ', ' + \ + table['node table'][idx][6] + + tload.write("Ingress."+table['node table'][idx][1]+".add_with_"+table['node table'][idx][0]+ + '('+key_and_values+')\n') diff --git a/src/models/RF/Type_depth_based_bmv2_only/readme.md b/src/models/RF/Type_depth_based_bmv2_only/readme.md index 20b917d..fed42c8 100644 --- a/src/models/RF/Type_depth_based_bmv2_only/readme.md +++ b/src/models/RF/Type_depth_based_bmv2_only/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for RF. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/RF/Type_depth_based_bmv2_only/table_generator.py b/src/models/RF/Type_depth_based_bmv2_only/table_generator.py index 4ed138a..9f3a545 100755 --- a/src/models/RF/Type_depth_based_bmv2_only/table_generator.py +++ b/src/models/RF/Type_depth_based_bmv2_only/table_generator.py @@ -1,396 +1,396 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh -# If you want to use this type of model, -# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - - -from sklearn.preprocessing import LabelEncoder -from sklearn.tree import _tree -from sklearn.ensemble import RandomForestClassifier -# from create_files import * -import math -import re -import json -from sklearn.metrics import * -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import copy -import os -import time -from sklearn import tree -from sklearn.tree import export_text -from sklearn.tree import _tree -from sklearn.tree import DecisionTreeClassifier - -# i_tree = 0 -# -# global_id = 0 - - -def export_p4(decision_tree, fname): - tree_ = decision_tree.tree_ - class_names = decision_tree.classes_ - right_child_fmt = "{} {} <= {}\n" - left_child_fmt = "{} {} > {}\n" - truncation_fmt = "{} {}\n" - feature_names_ = ["{}".format(i) for i in tree_.feature] - export_text.report = "" - max_depth = 10 - spacing = 3 - decimals = 2 - show_weights = False - - if isinstance(decision_tree, DecisionTreeClassifier): - value_fmt = "{}{} weights: {}\n" - if not show_weights: - value_fmt = "{}{}{}\n" - else: - value_fmt = "{}{} value: {}\n" - - def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): - global global_id - global i_tree - global Exact_Table - - current_id = global_id - - val = '' - is_classification = isinstance(decision_tree, - DecisionTreeClassifier) - if show_weights or not is_classification: - val = ["{1:.{0}f}, ".format(decimals, v) for v in value] - val = '[' + ''.join(val)[:-2] + ']' - if is_classification: - val += ' class: ' + str(class_name) - export_text.report += value_fmt.format(indent, '', val) - # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, - # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ - " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ - str(int(float(class_name))) +"\n") - - Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), - "level_" + str(i_tree) + "_" + str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), - str(int(float(class_name)))] - Exact_Table['node table counter'] += 1 - - - - - - def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): - indent = ("|" + (" " * spacing)) * depth - indent = indent[:-spacing] + "-" * spacing - global global_id - global i_tree - global Exact_Table - - global_id = global_id + 1 - current_id = global_id - - value = None - if tree_.n_outputs == 1: - value = tree_.value[node][0] - else: - value = tree_.value[node].T[0] - class_name = np.argmax(value) - - if (tree_.n_classes[0] != 1 and - tree_.n_outputs == 1): - class_name = class_names[class_name] - - if depth <= max_depth + 1: - info_fmt = "" - info_fmt_left = info_fmt - info_fmt_right = info_fmt - - if tree_.feature[node] != _tree.TREE_UNDEFINED: - name = feature_names_[node] - threshold = tree_.threshold[node] - threshold = "{1:.{0}f}".format(decimals, threshold) - export_text.report += right_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_left - with open(fname, 'a') as command: - command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ - str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + - " " + str(name) + " " + str(int(float(threshold)))+"\n") - global first_entry - global entry_info - global Exact_Table - - Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), - str(previous_id), str(prevfeature), - str(result), str(current_id), str(name) , - str(int(float(threshold)))] - Exact_Table['node table counter'] += 1 - - if first_entry: - first_entry = False - entry_info += [[previous_id, prevfeature, result]] - - print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) - - export_text.report += left_child_fmt.format(indent, - name, - threshold) - export_text.report += info_fmt_right - # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) - - print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) - else: # leaf - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - subtree_depth = _compute_depth(tree_, node) - if subtree_depth == 1: - _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) - else: - trunc_report = 'truncated branch of depth %d' % subtree_depth - export_text.report += truncation_fmt.format(indent, - trunc_report) - - print_tree_recurse(0, 1, 0, 1, global_id, fname) - - - -def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for i in range(num_trees): - vote[vote_list[i]] += 1 - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(num_classes): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) - tree_num -= 1 - return g_table, num - - -def get_lineage(tree, feature_names, file): - left = tree.tree_.children_left - right = tree.tree_.children_right - threshold = tree.tree_.threshold - features = [feature_names[i] for i in tree.tree_.feature] - value = tree.tree_.value - le = '<=' - g = '>' - # get ids of child nodes - idx = np.argwhere(left == -1)[:, 0] - # traverse the tree and get the node information - def recurse(left, right, child, lineage=None): - if lineage is None: - lineage = [child] - if child in left: - parent = np.where(left == child)[0].item() - split = 'l' - else: - parent = np.where(right == child)[0].item() - split = 'r' - lineage.append((parent, split, threshold[parent], features[parent])) - if parent == 0: - lineage.reverse() - return lineage - else: - return recurse(left, right, parent, lineage) - for j, child in enumerate(idx): - clause = ' when ' - for node in recurse(left, right, child): - if len(str(node)) < 3: - continue - i = node - if not isinstance(i, tuple): - continue - if i[1] == 'l': - sign = le - else: - sign = g - clause = clause + i[3] + sign + str(i[2]) + ' and ' - # wirte the node information into text file - a = list(value[node][0]) - ind = a.index(np.max(a)) - clause = clause[:-4] + ' then ' + str(ind) - file.write(clause) - file.write(";\n") - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_depth = Planter_config['model config']['number of depth'] - num_trees = Planter_config['model config']['number of trees'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f" + str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names += ["f" + str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # Random Forest - - rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) - rfc.fit(train_X, train_y) - - sklearn_y_predict = rfc.predict(test_X) - - result = classification_report(test_y, sklearn_y_predict, digits= 4) - print('\n',result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ( "num_feature: "+str(num_features)) not in log_dict: - log_dict["num_feature: "+str(num_features)] = {} - if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} - if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: - log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result - log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print ('Classification results are downloaded to log as', log_file) - - - fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' - # refresh the command (Table) file - with open(fname, 'w') as command: - command.write('') - - global global_id - global i_tree - global first_entry - global entry_info - global Exact_Table - - i_tree = 0 - global_id = 0 - entry_info = [] - Exact_Table = {} - Exact_Table['node table'] = {} - Exact_Table['node table counter'] = 0 - - for idx, estimator in enumerate(rfc.estimators_): - with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: - f.write('') - with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: - get_lineage(estimator, feature_names, f) - first_entry = True - i_tree = i_tree + 1 - export_p4(estimator, fname) - # print(entry_info) - - g_table = {} - print("Generating vote to class table...", end="") - g_table['votes to class'] = {} - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) - print('Done') - - g_table['decision'] = g_table['votes to class'] - - collect_class = [] - for idx in g_table['decision']: - collect_class += [g_table['decision'][idx]['class']] - default_class = max(collect_class, key=collect_class.count) - - code_table_size = 0 - Exact_Table['decision'] = {} - for idx in g_table['decision']: - if g_table['decision'][idx]['class'] != default_class: - Exact_Table['decision'][code_table_size] = g_table['decision'][idx] - code_table_size += 1 - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Depth_Based_Table.txt and Exact_Table.json is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "RF" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']["number of depth"] = num_depth - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) - Planter_config['p4 config']["first entry info"] = entry_info - Planter_config['p4 config']["default label"] = default_class - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - -def test_tables(sklearn_test_y, test_X, test_y): - print('The python simulation test does not support this model, please do the following emulation test on the software switch.') - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of trees"]*Planter_config['p4 config']["number of depth"]) - - - - -if __name__ == '__main__': - print('there are many dependencies, directly run is not currently supported') +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2019 Jong-Hyouk Lee and Kamal Singh +# If you want to use this type of model, +# please cite their work 'SwitchTree: In-network Computing and Traffic Analyses with Random Forests' +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + + +from sklearn.preprocessing import LabelEncoder +from sklearn.tree import _tree +from sklearn.ensemble import RandomForestClassifier +# from create_files import * +import math +import re +import json +from sklearn.metrics import * +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import copy +import os +import time +from sklearn import tree +from sklearn.tree import export_text +from sklearn.tree import _tree +from sklearn.tree import DecisionTreeClassifier + +# i_tree = 0 +# +# global_id = 0 + + +def export_p4(decision_tree, fname): + tree_ = decision_tree.tree_ + class_names = decision_tree.classes_ + right_child_fmt = "{} {} <= {}\n" + left_child_fmt = "{} {} > {}\n" + truncation_fmt = "{} {}\n" + feature_names_ = ["{}".format(i) for i in tree_.feature] + export_text.report = "" + max_depth = 10 + spacing = 3 + decimals = 2 + show_weights = False + + if isinstance(decision_tree, DecisionTreeClassifier): + value_fmt = "{}{} weights: {}\n" + if not show_weights: + value_fmt = "{}{}{}\n" + else: + value_fmt = "{}{} value: {}\n" + + def _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname): + global global_id + global i_tree + global Exact_Table + + current_id = global_id + + val = '' + is_classification = isinstance(decision_tree, + DecisionTreeClassifier) + if show_weights or not is_classification: + val = ["{1:.{0}f}, ".format(decimals, v) for v in value] + val = '[' + ''.join(val)[:-2] + ']' + if is_classification: + val += ' class: ' + str(class_name) + export_text.report += value_fmt.format(indent, '', val) + # print("table_add MyIngress.level_", i_tree, "_", depth, " ", "MyIngress.SetClass", i_tree, " ", previous_id, + # " ", prevfeature, " ", result, " ", "=>", " ", current_id, " ", int(float(class_name)), sep="") + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+" SwitchIngress.SetClass"+str(i_tree)+ + " "+str(previous_id)+ " "+str(prevfeature)+ " "+str(result)+ " => "+str(current_id)+ " "+ + str(int(float(class_name))) +"\n") + + Exact_Table['node table'][Exact_Table['node table counter']] = ["SetClass"+str(i_tree), + "level_" + str(i_tree) + "_" + str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), + str(int(float(class_name)))] + Exact_Table['node table counter'] += 1 + + + + + + def print_tree_recurse(node, depth, prevfeature, result, previous_id, fname): + indent = ("|" + (" " * spacing)) * depth + indent = indent[:-spacing] + "-" * spacing + global global_id + global i_tree + global Exact_Table + + global_id = global_id + 1 + current_id = global_id + + value = None + if tree_.n_outputs == 1: + value = tree_.value[node][0] + else: + value = tree_.value[node].T[0] + class_name = np.argmax(value) + + if (tree_.n_classes[0] != 1 and + tree_.n_outputs == 1): + class_name = class_names[class_name] + + if depth <= max_depth + 1: + info_fmt = "" + info_fmt_left = info_fmt + info_fmt_right = info_fmt + + if tree_.feature[node] != _tree.TREE_UNDEFINED: + name = feature_names_[node] + threshold = tree_.threshold[node] + threshold = "{1:.{0}f}".format(decimals, threshold) + export_text.report += right_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_left + with open(fname, 'a') as command: + command.write("table_add SwitchIngress.level_"+str(i_tree)+ "_"+str(depth)+ " SwitchIngress.CheckFeature "+ + str(previous_id) + " " + str(prevfeature) + " "+str(result) + " => " + str(current_id) + + " " + str(name) + " " + str(int(float(threshold)))+"\n") + global first_entry + global entry_info + global Exact_Table + + Exact_Table['node table'][Exact_Table['node table counter']] = ["CheckFeature", "level_"+str(i_tree)+ "_"+str(depth), + str(previous_id), str(prevfeature), + str(result), str(current_id), str(name) , + str(int(float(threshold)))] + Exact_Table['node table counter'] += 1 + + if first_entry: + first_entry = False + entry_info += [[previous_id, prevfeature, result]] + + print_tree_recurse(tree_.children_left[node], depth + 1, name, 1, current_id, fname) + + export_text.report += left_child_fmt.format(indent, + name, + threshold) + export_text.report += info_fmt_right + # print("level", depth, "checkfeature", prevfeature, result, "=>", name, threshold) + + print_tree_recurse(tree_.children_right[node], depth + 1, name, 0, current_id, fname) + else: # leaf + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + subtree_depth = _compute_depth(tree_, node) + if subtree_depth == 1: + _add_leaf(value, class_name, indent, prevfeature, result, depth, previous_id, fname) + else: + trunc_report = 'truncated branch of depth %d' % subtree_depth + export_text.report += truncation_fmt.format(indent, + trunc_report) + + print_tree_recurse(0, 1, 0, 1, global_id, fname) + + + +def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for i in range(num_trees): + vote[vote_list[i]] += 1 + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(num_classes): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num) + tree_num -= 1 + return g_table, num + + +def get_lineage(tree, feature_names, file): + left = tree.tree_.children_left + right = tree.tree_.children_right + threshold = tree.tree_.threshold + features = [feature_names[i] for i in tree.tree_.feature] + value = tree.tree_.value + le = '<=' + g = '>' + # get ids of child nodes + idx = np.argwhere(left == -1)[:, 0] + # traverse the tree and get the node information + def recurse(left, right, child, lineage=None): + if lineage is None: + lineage = [child] + if child in left: + parent = np.where(left == child)[0].item() + split = 'l' + else: + parent = np.where(right == child)[0].item() + split = 'r' + lineage.append((parent, split, threshold[parent], features[parent])) + if parent == 0: + lineage.reverse() + return lineage + else: + return recurse(left, right, parent, lineage) + for j, child in enumerate(idx): + clause = ' when ' + for node in recurse(left, right, child): + if len(str(node)) < 3: + continue + i = node + if not isinstance(i, tuple): + continue + if i[1] == 'l': + sign = le + else: + sign = g + clause = clause + i[3] + sign + str(i[2]) + ' and ' + # wirte the node information into text file + a = list(value[node][0]) + ind = a.index(np.max(a)) + clause = clause[:-4] + ' then ' + str(ind) + file.write(clause) + file.write(";\n") + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_depth = Planter_config['model config']['number of depth'] + num_trees = Planter_config['model config']['number of trees'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f" + str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names += ["f" + str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # Random Forest + + rfc = RandomForestClassifier(n_estimators=num_trees, max_depth=num_depth, max_leaf_nodes=max_leaf_nodes) + rfc.fit(train_X, train_y) + + sklearn_y_predict = rfc.predict(test_X) + + result = classification_report(test_y, sklearn_y_predict, digits= 4) + print('\n',result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ( "num_feature: "+str(num_features)) not in log_dict: + log_dict["num_feature: "+str(num_features)] = {} + if ( "num_tree: "+str(num_trees)) not in log_dict["num_feature: "+str(num_features)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)] = {} + if ( "num_depth: "+str(num_depth)) not in log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]: + log_dict["num_feature: "+str(num_features)]["num_tree: "+str(num_trees)]["num_depth: "+ str(num_depth)]= {} + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["classification_report"] = result + log_dict["num_feature: " + str(num_features)][ "num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)]["max number of leaf nodes"] =max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print ('Classification results are downloaded to log as', log_file) + + + fname = Planter_config['directory config']['work']+'/Tables/Depth_Based_Table.txt' + # refresh the command (Table) file + with open(fname, 'w') as command: + command.write('') + + global global_id + global i_tree + global first_entry + global entry_info + global Exact_Table + + i_tree = 0 + global_id = 0 + entry_info = [] + Exact_Table = {} + Exact_Table['node table'] = {} + Exact_Table['node table counter'] = 0 + + for idx, estimator in enumerate(rfc.estimators_): + with open('./src/temp/tree' + str(idx) + '.txt', 'w') as f: + f.write('') + with open('./src/temp/tree' + str(idx) + '.txt', 'a') as f: + get_lineage(estimator, feature_names, f) + first_entry = True + i_tree = i_tree + 1 + export_p4(estimator, fname) + # print(entry_info) + + g_table = {} + print("Generating vote to class table...", end="") + g_table['votes to class'] = {} + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0) + print('Done') + + g_table['decision'] = g_table['votes to class'] + + collect_class = [] + for idx in g_table['decision']: + collect_class += [g_table['decision'][idx]['class']] + default_class = max(collect_class, key=collect_class.count) + + code_table_size = 0 + Exact_Table['decision'] = {} + for idx in g_table['decision']: + if g_table['decision'][idx]['class'] != default_class: + Exact_Table['decision'][code_table_size] = g_table['decision'][idx] + code_table_size += 1 + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Depth_Based_Table.txt and Exact_Table.json is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "RF" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']["number of depth"] = num_depth + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']["decision table size"] = len(Exact_Table['decision'].keys()) + Planter_config['p4 config']["first entry info"] = entry_info + Planter_config['p4 config']["default label"] = default_class + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config , open(Planter_config['directory config']['work']+'/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work']+'/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + +def test_tables(sklearn_test_y, test_X, test_y): + print('The python simulation test does not support this model, please do the following emulation test on the software switch.') + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',Planter_config['p4 config']["decision table size"]+1024*Planter_config['p4 config']["number of trees"]*Planter_config['p4 config']["number of depth"]) + + + + +if __name__ == '__main__': + print('there are many dependencies, directly run is not currently supported') diff --git a/src/models/RF/readme.md b/src/models/RF/readme.md index 6d7f692..a38ceee 100644 --- a/src/models/RF/readme.md +++ b/src/models/RF/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported RF. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported RF. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/SVM/Type_1/dedicated_p4.py b/src/models/SVM/Type_1/dedicated_p4.py index 92afdea..ee6e563 100755 --- a/src/models/SVM/Type_1/dedicated_p4.py +++ b/src/models/SVM/Type_1/dedicated_p4.py @@ -1,351 +1,351 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['num_hps'] = Planter_config['p4 config']["number of hps"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - config['thresh_and_bias'] = Planter_config['p4 config']['thresh and bias'] - config['hp_info'] = Planter_config['p4 config']['hp_info'] - - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for f in range(0, config['num_features']): - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) +"_hp" + str(h)+ ";\n") - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> bias_hp" + str(h) + ";\n") - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> threshold_hp" + str(h) + ";\n") - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> result_hp" + str(h) + ";\n") - for c in range(0, config['num_classes']): - headers.write(" bit<4> vote_c" + str(c) + ";\n") - - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for h in range(0, config['num_hps']): - if h==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "hp" + str(h)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"hp"+str(h)) - ingress.write("){\n") - for h in range(0, config['num_hps']): - ingress.write(" meta.result_hp" + str(h)+" = meta.result_hp" + str(h)+" + f"+str(f)+"hp"+str(h) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { meta.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_thresh_and_bias(") - for h in range(0, config['num_hps']): - if h==0: - ingress.write("bit<" + str(config['num_bits']) + "> thresh_hp" + str(h)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> thresh_hp"+str(h)) - for h in range(0, config['num_hps']): - ingress.write(", bit<" + str(config['num_bits']) + "> bia_hp" + str(h)) - ingress.write("){\n") - for h in range(0, config['num_hps']): - ingress.write(" meta.threshold_hp" + str(h)+ " = thresh_hp"+str(h) +";\n") - for h in range(0, config['num_hps']): - ingress.write(" meta.result_hp" + str(h)+ " = bia_hp"+str(h) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table thresh_and_bias {\n" - " key = {meta.flag:exact;}\n" - " actions={read_thresh_and_bias; NoAction;}\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - - ingress.write(" table decision {\n" - " key = {\n") - for c in range(config['num_classes']): - ingress.write(" meta.vote_c"+str(c)+": exact;\n") - ingress.write(" }\n" - " actions={read_lable; NoAction;}\n" - " default_action = NoAction;\n" - " size = "+str(config['num_classes']**config['num_hps'])+";\n" - " }\n\n") - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for c in range(0, config['num_classes']): - ingress.write(" meta.vote_c"+str(c)+"=0;\n") - - ingress.write(" thresh_and_bias.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - for h in range(0, config['num_hps']): - ingress.write(" meta.result_hp" + str(h) + " = meta.threshold_hp"+str(h)+" - meta.result_hp"+str(h)+ ";\n") - for h in range(0, config['num_hps']): - ingress.write(" if(meta.result_hp" + str(h) + " & 0b1"+"0"*(config['num_bits']-1)+"==0){\n") - if config['num_classes']!=2: - ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][1]) + " = meta.vote_c" + str(config['hp_info'][str(h)][1]) + "+ 1;\n") - else: ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][0]) + " = meta.vote_c" + str(config['hp_info'][str(h)][0]) + "+ 1;\n") - ingress.write(" }else{") - if config['num_classes']!=2: - ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][0]) +" = meta.vote_c"+str(config['hp_info'][str(h)][0]) +"+ 1;}\n\n") - else:ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][1]) +" = meta.vote_c"+str(config['hp_info'][str(h)][1]) +"+ 1;}\n\n") - ingress.write(" decision.apply();\n") - - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_hps = np.int(num_classes * (num_classes - 1) / 2) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['f' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["meta.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - for hp in range(num_hps): - Entry["action_params"]["f"+str(f)+"hp"+str(hp)] = Exact_Table['f' + str(f)][idx]["hp "+str(hp)] - Table_entries += [Entry] - - - for idx in Exact_Table['decision']: - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for c in range(num_classes): - Entry["match"]["meta.vote_c"+str(c)] = np.int(Exact_Table['decision'][idx]['c'+str(c)+' vote']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = np.int(Exact_Table['decision'][idx]["class"]) - Table_entries += [Entry] - - Entry = {} - Entry["table"] = "SwitchIngress.thresh_and_bias" - Entry["match"] = {} - Entry["match"]["meta.flag"] = 1 - Entry["action_name"] = "SwitchIngress.read_thresh_and_bias" - Entry["action_params"] = {} - for hp in range(num_hps): - Entry["action_params"]["thresh_hp"+str(hp)] = np.int(Exact_Table['threshold hp'+str(hp)]) - Entry["action_params"]["bia_hp" + str(hp)] = np.int(Exact_Table['bias hp' + str(hp)]) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_hps = np.int(num_classes * (num_classes - 1) / 2) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Exact_Table['f' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => ") - for hp in range(num_hps): - file.write(str(Exact_Table['f' + str(f)][idx][ "hp " + str(hp)])+" ") - file.write("\n") - file.write("\n") - - - for idx in Exact_Table['decision']: - key_value = int(idx) - file.write("table_add SwitchIngress.decision read_lable ") - for c in range(num_classes): - file.write(str(int(Exact_Table['decision'][idx]['c' + str(c) + ' vote']))+" ") - file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") - - - file.write("table_add SwitchIngress.thresh_and_bias read_thresh_and_bias 1 => ") - for hp in range(num_hps): - file.write(str(np.int(Exact_Table['threshold hp' + str(hp)]))+" ") - for hp in range(num_hps): - file.write(str(np.int(np.int(Exact_Table['bias hp' + str(hp)]))) + " ") - file.write("\n") - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['f" + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['f" + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for h in range(0, config['num_hps']): - if h==0: - tload.write("table['f" + str(f) + "'][key]['hp " + str(h) + "']") - else: - tload.write(", table['f"+str(f)+"'][key]['hp "+str(h)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load thresh_and_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.thresh_and_bias.add_with_read_thresh_and_bias(" - "1, ") - for h in range(0, config['num_hps']): - if h == 0: - tload.write("table['threshold hp" + str(h) + "']") - else: - tload.write(", table['threshold hp" + str(h) + "']") - for h in range(0, config['num_hps']): - tload.write(", table['bias hp" + str(h) + "']") - tload.write(")\n\n") - - tload.write("print('load decision table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for c in range(config['num_classes']): - tload.write("table['decision'][key]['c" + str(c) + " vote'], ") - tload.write(" int(table['decision'][key]['class']))\n") - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['num_hps'] = Planter_config['p4 config']["number of hps"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + config['thresh_and_bias'] = Planter_config['p4 config']['thresh and bias'] + config['hp_info'] = Planter_config['p4 config']['hp_info'] + + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for f in range(0, config['num_features']): + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) +"_hp" + str(h)+ ";\n") + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> bias_hp" + str(h) + ";\n") + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> threshold_hp" + str(h) + ";\n") + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> result_hp" + str(h) + ";\n") + for c in range(0, config['num_classes']): + headers.write(" bit<4> vote_c" + str(c) + ";\n") + + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for h in range(0, config['num_hps']): + if h==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "hp" + str(h)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"hp"+str(h)) + ingress.write("){\n") + for h in range(0, config['num_hps']): + ingress.write(" meta.result_hp" + str(h)+" = meta.result_hp" + str(h)+" + f"+str(f)+"hp"+str(h) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { meta.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_thresh_and_bias(") + for h in range(0, config['num_hps']): + if h==0: + ingress.write("bit<" + str(config['num_bits']) + "> thresh_hp" + str(h)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> thresh_hp"+str(h)) + for h in range(0, config['num_hps']): + ingress.write(", bit<" + str(config['num_bits']) + "> bia_hp" + str(h)) + ingress.write("){\n") + for h in range(0, config['num_hps']): + ingress.write(" meta.threshold_hp" + str(h)+ " = thresh_hp"+str(h) +";\n") + for h in range(0, config['num_hps']): + ingress.write(" meta.result_hp" + str(h)+ " = bia_hp"+str(h) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table thresh_and_bias {\n" + " key = {meta.flag:exact;}\n" + " actions={read_thresh_and_bias; NoAction;}\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + + ingress.write(" table decision {\n" + " key = {\n") + for c in range(config['num_classes']): + ingress.write(" meta.vote_c"+str(c)+": exact;\n") + ingress.write(" }\n" + " actions={read_lable; NoAction;}\n" + " default_action = NoAction;\n" + " size = "+str(config['num_classes']**config['num_hps'])+";\n" + " }\n\n") + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for c in range(0, config['num_classes']): + ingress.write(" meta.vote_c"+str(c)+"=0;\n") + + ingress.write(" thresh_and_bias.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + for h in range(0, config['num_hps']): + ingress.write(" meta.result_hp" + str(h) + " = meta.threshold_hp"+str(h)+" - meta.result_hp"+str(h)+ ";\n") + for h in range(0, config['num_hps']): + ingress.write(" if(meta.result_hp" + str(h) + " & 0b1"+"0"*(config['num_bits']-1)+"==0){\n") + if config['num_classes']!=2: + ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][1]) + " = meta.vote_c" + str(config['hp_info'][str(h)][1]) + "+ 1;\n") + else: ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][0]) + " = meta.vote_c" + str(config['hp_info'][str(h)][0]) + "+ 1;\n") + ingress.write(" }else{") + if config['num_classes']!=2: + ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][0]) +" = meta.vote_c"+str(config['hp_info'][str(h)][0]) +"+ 1;}\n\n") + else:ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][1]) +" = meta.vote_c"+str(config['hp_info'][str(h)][1]) +"+ 1;}\n\n") + ingress.write(" decision.apply();\n") + + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_hps = np.int(num_classes * (num_classes - 1) / 2) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['f' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["meta.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + for hp in range(num_hps): + Entry["action_params"]["f"+str(f)+"hp"+str(hp)] = Exact_Table['f' + str(f)][idx]["hp "+str(hp)] + Table_entries += [Entry] + + + for idx in Exact_Table['decision']: + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for c in range(num_classes): + Entry["match"]["meta.vote_c"+str(c)] = np.int(Exact_Table['decision'][idx]['c'+str(c)+' vote']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = np.int(Exact_Table['decision'][idx]["class"]) + Table_entries += [Entry] + + Entry = {} + Entry["table"] = "SwitchIngress.thresh_and_bias" + Entry["match"] = {} + Entry["match"]["meta.flag"] = 1 + Entry["action_name"] = "SwitchIngress.read_thresh_and_bias" + Entry["action_params"] = {} + for hp in range(num_hps): + Entry["action_params"]["thresh_hp"+str(hp)] = np.int(Exact_Table['threshold hp'+str(hp)]) + Entry["action_params"]["bia_hp" + str(hp)] = np.int(Exact_Table['bias hp' + str(hp)]) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_hps = np.int(num_classes * (num_classes - 1) / 2) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Exact_Table['f' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => ") + for hp in range(num_hps): + file.write(str(Exact_Table['f' + str(f)][idx][ "hp " + str(hp)])+" ") + file.write("\n") + file.write("\n") + + + for idx in Exact_Table['decision']: + key_value = int(idx) + file.write("table_add SwitchIngress.decision read_lable ") + for c in range(num_classes): + file.write(str(int(Exact_Table['decision'][idx]['c' + str(c) + ' vote']))+" ") + file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") + + + file.write("table_add SwitchIngress.thresh_and_bias read_thresh_and_bias 1 => ") + for hp in range(num_hps): + file.write(str(np.int(Exact_Table['threshold hp' + str(hp)]))+" ") + for hp in range(num_hps): + file.write(str(np.int(np.int(Exact_Table['bias hp' + str(hp)]))) + " ") + file.write("\n") + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['f" + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['f" + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for h in range(0, config['num_hps']): + if h==0: + tload.write("table['f" + str(f) + "'][key]['hp " + str(h) + "']") + else: + tload.write(", table['f"+str(f)+"'][key]['hp "+str(h)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load thresh_and_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.thresh_and_bias.add_with_read_thresh_and_bias(" + "1, ") + for h in range(0, config['num_hps']): + if h == 0: + tload.write("table['threshold hp" + str(h) + "']") + else: + tload.write(", table['threshold hp" + str(h) + "']") + for h in range(0, config['num_hps']): + tload.write(", table['bias hp" + str(h) + "']") + tload.write(")\n\n") + + tload.write("print('load decision table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for c in range(config['num_classes']): + tload.write("table['decision'][key]['c" + str(c) + " vote'], ") + tload.write(" int(table['decision'][key]['class']))\n") + + + + diff --git a/src/models/SVM/Type_1/readme.md b/src/models/SVM/Type_1/readme.md index 2fe18a5..0668a27 100644 --- a/src/models/SVM/Type_1/readme.md +++ b/src/models/SVM/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for SVM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for SVM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/SVM/Type_1/table_generator.py b/src/models/SVM/Type_1/table_generator.py index febbdee..c25424c 100755 --- a/src/models/SVM/Type_1/table_generator.py +++ b/src/models/SVM/Type_1/table_generator.py @@ -1,322 +1,322 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn.metrics import * -from sklearn.svm import SVC -from sklearn.svm import LinearSVC -from sklearn.naive_bayes import GaussianNB -import pydotplus -from sklearn.linear_model import SGDClassifier -import math - -import os -import sys -# import grpc -import json -from src.functions.json_encoder import * -from src.functions.normalization import * - - - - -def ten_to_bin(num,count): - num= num.astype(np.int) - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num): - if class_num == num_classes: - if np.sum(vote_list) == num_votes: - g_table['decision'][num] = {} - for c in range(num_classes): - g_table['decision'][num]['c'+str(c)+' vote'] = vote_list[c] - g_table['decision'][num]['class'] = vote_list.index(np.max(vote_list)) - num += 1 - return g_table, num - else: - for v in range(num_votes+1): - vote_list[class_num] = v - class_num += 1 - g_table, num = votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num) - class_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_hps = np.int(num_classes * (num_classes - 1) / 2) - - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [max(t_t)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - #SVM instance - SVM = SVC(kernel = 'linear') - SVM.fit(train_X, train_y) - sklearn_y_predict = SVM.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - coe = SVM.coef_ - int = SVM.intercept_ - - - outputfile = 'src/temp/svm.txt' - model = open(outputfile,"w+") - for i in range(len(coe)): - model.write("hyperplane"+str(i)+" = ") - for f in range(num_features): - model.write(str(coe[i][f]) + "x"+str(f+1)+" + ") - model.write(str(int[i])) - model.write(";\n") - model.close() - - # Table without fitting to switch - SVM_separate_table = {} - - value_info = {} - for hp in range(num_hps): - SVM_separate_table["bias hp"+str(hp)]=int[hp] - value_info["hp "+str(hp)] = {} - value_info["hp " + str(hp)]["max"] = int[hp] - value_info["hp " + str(hp)]["min"] = int[hp] - - - - for i,fn in enumerate(feature_names): - - SVM_separate_table[fn] = {} - for feature in range(feature_max[i]+1): - SVM_separate_table[fn][feature] = {} - for hp in range(num_hps): - middle_value = coe[hp][i] * feature - SVM_separate_table[fn][feature]["hp "+str(hp)] = middle_value - if middle_value > value_info["hp " + str(hp)]["max"]: - value_info["hp " + str(hp)]["max"] = middle_value - if middle_value < value_info["hp " + str(hp)]["min"]: - value_info["hp " + str(hp)]["min"] = middle_value - - for hp in range(num_hps): - SVM_separate_table['threshold hp'+str(hp)] = 0 - - # Table fit to switch - scale = np.floor((2**num_bits)/ (value_info["hp " + str(hp)]["max"] - value_info["hp " + str(hp)]["min"])/num_features) - Exact_Table = {} - - print("Generating decision table...", end="") - Exact_Table['decision'] = {} - Exact_Table, _ = votes_to_class(0, np.zeros(num_classes).tolist(), num_hps, num_classes, Exact_Table, 0) - print('Done') - - for hp in range(num_hps): - x = SVM_separate_table["bias hp"+str(hp)] - min_x = value_info["hp " + str(hp)]["min"] - max_x = value_info["hp " + str(hp)]["max"] - - Exact_Table['threshold hp' + str(hp)] = -np.int(scale*((num_features + 1) * min_x)) - Exact_Table["bias hp" + str(hp)] = np.int(scale*(x - min_x)) - - - - for i,fn in enumerate(feature_names): - Exact_Table[fn] = {} - for feature in range(feature_max[i]+1): - Exact_Table[fn][feature] = {} - for hp in range(num_hps): - x = SVM_separate_table[fn][feature]["hp "+str(hp)] - min_x = value_info["hp " + str(hp)]["min"] - max_x = value_info["hp " + str(hp)]["max"] - - Exact_Table[fn][feature]["hp "+str(hp)] = np.int(scale*(x - min_x)) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table,open('Tables/Exact_Table.json','w'),indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['f'+str(f)].keys())] - - thresh_and_bias = '' - for h in range(num_hps): - if h == 0: - thresh_and_bias += str(Exact_Table['threshold hp' + str(h)]) - else: - thresh_and_bias += (', '+ str(Exact_Table['threshold hp' + str(h)])) - for h in range(num_hps): - thresh_and_bias += (', ' + str(Exact_Table['bias hp' + str(h)])) - - hp_info = {} - count = 0 - initial = 0 - while True: - for c in range(num_classes): - if c > initial: - hp_info[count] = [initial, c] - count += 1 - initial += 1 - if initial >= num_classes - 1: - break - - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "SVM" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']["number of hps"] = num_hps - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']['thresh and bias'] = thresh_and_bias - Planter_config['p4 config']['hp_info'] = hp_info - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - hp_info = {} - count = 0 - initial = 0 - while True: - for c in range(num_classes): - if c> initial: - hp_info[count] = [initial,c] - count+=1 - initial+=1 - if initial >= num_classes-1: - break - - - for i in range(np.shape(test_X.values)[0]): - class_vote = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - for hp in range(np.int(num_classes * (num_classes - 1) / 2)): - hp_value = 0 - for f in range(num_features): - hp_value += Exact_Table["f"+str(f)][str(input_feature_value[f])]["hp "+str(hp)] - hp_value += Exact_Table["bias hp"+str(hp)] - if num_classes ==2: - if hp_value>Exact_Table["threshold hp"+str(hp)]: - class_vote[hp_info[hp][1]] += 1 - else: - class_vote[hp_info[hp][0]] += 1 - else: - if hp_value>Exact_Table["threshold hp"+str(hp)]: - class_vote[hp_info[hp][0]] += 1 - else: - class_vote[hp_info[hp][1]] += 1 - - switch_prediction = class_vote.index(np.max(class_vote)) - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - - - print('\nThe accuracy of the match action format of SVM is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn.metrics import * +from sklearn.svm import SVC +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +import pydotplus +from sklearn.linear_model import SGDClassifier +import math + +import os +import sys +# import grpc +import json +from src.functions.json_encoder import * +from src.functions.normalization import * + + + + +def ten_to_bin(num,count): + num= num.astype(np.int) + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num): + if class_num == num_classes: + if np.sum(vote_list) == num_votes: + g_table['decision'][num] = {} + for c in range(num_classes): + g_table['decision'][num]['c'+str(c)+' vote'] = vote_list[c] + g_table['decision'][num]['class'] = vote_list.index(np.max(vote_list)) + num += 1 + return g_table, num + else: + for v in range(num_votes+1): + vote_list[class_num] = v + class_num += 1 + g_table, num = votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num) + class_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_hps = np.int(num_classes * (num_classes - 1) / 2) + + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [max(t_t)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + #SVM instance + SVM = SVC(kernel = 'linear') + SVM.fit(train_X, train_y) + sklearn_y_predict = SVM.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + coe = SVM.coef_ + int = SVM.intercept_ + + + outputfile = 'src/temp/svm.txt' + model = open(outputfile,"w+") + for i in range(len(coe)): + model.write("hyperplane"+str(i)+" = ") + for f in range(num_features): + model.write(str(coe[i][f]) + "x"+str(f+1)+" + ") + model.write(str(int[i])) + model.write(";\n") + model.close() + + # Table without fitting to switch + SVM_separate_table = {} + + value_info = {} + for hp in range(num_hps): + SVM_separate_table["bias hp"+str(hp)]=int[hp] + value_info["hp "+str(hp)] = {} + value_info["hp " + str(hp)]["max"] = int[hp] + value_info["hp " + str(hp)]["min"] = int[hp] + + + + for i,fn in enumerate(feature_names): + + SVM_separate_table[fn] = {} + for feature in range(feature_max[i]+1): + SVM_separate_table[fn][feature] = {} + for hp in range(num_hps): + middle_value = coe[hp][i] * feature + SVM_separate_table[fn][feature]["hp "+str(hp)] = middle_value + if middle_value > value_info["hp " + str(hp)]["max"]: + value_info["hp " + str(hp)]["max"] = middle_value + if middle_value < value_info["hp " + str(hp)]["min"]: + value_info["hp " + str(hp)]["min"] = middle_value + + for hp in range(num_hps): + SVM_separate_table['threshold hp'+str(hp)] = 0 + + # Table fit to switch + scale = np.floor((2**num_bits)/ (value_info["hp " + str(hp)]["max"] - value_info["hp " + str(hp)]["min"])/num_features) + Exact_Table = {} + + print("Generating decision table...", end="") + Exact_Table['decision'] = {} + Exact_Table, _ = votes_to_class(0, np.zeros(num_classes).tolist(), num_hps, num_classes, Exact_Table, 0) + print('Done') + + for hp in range(num_hps): + x = SVM_separate_table["bias hp"+str(hp)] + min_x = value_info["hp " + str(hp)]["min"] + max_x = value_info["hp " + str(hp)]["max"] + + Exact_Table['threshold hp' + str(hp)] = -np.int(scale*((num_features + 1) * min_x)) + Exact_Table["bias hp" + str(hp)] = np.int(scale*(x - min_x)) + + + + for i,fn in enumerate(feature_names): + Exact_Table[fn] = {} + for feature in range(feature_max[i]+1): + Exact_Table[fn][feature] = {} + for hp in range(num_hps): + x = SVM_separate_table[fn][feature]["hp "+str(hp)] + min_x = value_info["hp " + str(hp)]["min"] + max_x = value_info["hp " + str(hp)]["max"] + + Exact_Table[fn][feature]["hp "+str(hp)] = np.int(scale*(x - min_x)) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table,open('Tables/Exact_Table.json','w'),indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['f'+str(f)].keys())] + + thresh_and_bias = '' + for h in range(num_hps): + if h == 0: + thresh_and_bias += str(Exact_Table['threshold hp' + str(h)]) + else: + thresh_and_bias += (', '+ str(Exact_Table['threshold hp' + str(h)])) + for h in range(num_hps): + thresh_and_bias += (', ' + str(Exact_Table['bias hp' + str(h)])) + + hp_info = {} + count = 0 + initial = 0 + while True: + for c in range(num_classes): + if c > initial: + hp_info[count] = [initial, c] + count += 1 + initial += 1 + if initial >= num_classes - 1: + break + + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "SVM" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']["number of hps"] = num_hps + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']['thresh and bias'] = thresh_and_bias + Planter_config['p4 config']['hp_info'] = hp_info + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + hp_info = {} + count = 0 + initial = 0 + while True: + for c in range(num_classes): + if c> initial: + hp_info[count] = [initial,c] + count+=1 + initial+=1 + if initial >= num_classes-1: + break + + + for i in range(np.shape(test_X.values)[0]): + class_vote = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + for hp in range(np.int(num_classes * (num_classes - 1) / 2)): + hp_value = 0 + for f in range(num_features): + hp_value += Exact_Table["f"+str(f)][str(input_feature_value[f])]["hp "+str(hp)] + hp_value += Exact_Table["bias hp"+str(hp)] + if num_classes ==2: + if hp_value>Exact_Table["threshold hp"+str(hp)]: + class_vote[hp_info[hp][1]] += 1 + else: + class_vote[hp_info[hp][0]] += 1 + else: + if hp_value>Exact_Table["threshold hp"+str(hp)]: + class_vote[hp_info[hp][0]] += 1 + else: + class_vote[hp_info[hp][1]] += 1 + + switch_prediction = class_vote.index(np.max(class_vote)) + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + + + print('\nThe accuracy of the match action format of SVM is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/SVM/Type_LB/dedicated_p4.py b/src/models/SVM/Type_LB/dedicated_p4.py index cc925f7..282e4e9 100755 --- a/src/models/SVM/Type_LB/dedicated_p4.py +++ b/src/models/SVM/Type_LB/dedicated_p4.py @@ -1,349 +1,349 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_classes'] = config_file["number of classes"] - config['num_bits'] = Planter_config['p4 config']["action data bits"] - config['num_hps'] = Planter_config['p4 config']["number of hps"] - config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] - config['thresh_and_bias'] = Planter_config['p4 config']['thresh and bias'] - config['hp_info'] = Planter_config['p4 config']['hp_info'] - - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for f in range(0, config['num_features']): - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) +"_hp" + str(h)+ ";\n") - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> bias_hp" + str(h) + ";\n") - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> threshold_hp" + str(h) + ";\n") - for h in range(0, config['num_hps']): - headers.write(" bit<" + str(config['num_bits']) + "> result_hp" + str(h) + ";\n") - for c in range(0, config['num_classes']): - headers.write(" bit<4> vote_c" + str(c) + ";\n") - - # headers.write("}\n\n") - - - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for f in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(f)+'(') - for h in range(0, config['num_hps']): - if h==0: - ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "hp" + str(h)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"hp"+str(h)) - ingress.write("){\n") - for h in range(0, config['num_hps']): - ingress.write(" meta.result_hp" + str(h)+" = meta.result_hp" + str(h)+" + f"+str(f)+"hp"+str(h) +";\n") - ingress.write(" }\n\n") - - - - for f in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(f) + " {\n" - " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" - " actions = {\n" - " extract_feature" + str(f) + "();\n" - " NoAction;\n" - " }\n" - " size = " + str( config['f_tbl_len'][f]) + ";\n" - " default_action = NoAction;\n" - " }\n\n") - - - ingress.write(" action read_thresh_and_bias(") - for h in range(0, config['num_hps']): - if h==0: - ingress.write("bit<" + str(config['num_bits']) + "> thresh_hp" + str(h)) - else: - ingress.write(", bit<" + str(config['num_bits']) + "> thresh_hp"+str(h)) - for h in range(0, config['num_hps']): - ingress.write(", bit<" + str(config['num_bits']) + "> bia_hp" + str(h)) - ingress.write("){\n") - for h in range(0, config['num_hps']): - ingress.write(" meta.threshold_hp" + str(h)+ " = thresh_hp"+str(h) +";\n") - for h in range(0, config['num_hps']): - ingress.write(" meta.result_hp" + str(h)+ " = bia_hp"+str(h) +";\n") - ingress.write(" }\n\n") - - - - ingress.write(" table thresh_and_bias {\n" - " key = {hdr.Planter.ver:exact;}\n" - " actions={read_thresh_and_bias; NoAction;}\n" - " default_action = NoAction;\n" - " size = 1;\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - - ingress.write(" table decision {\n" - " key = {\n") - for c in range(config['num_classes']): - ingress.write(" meta.vote_c"+str(c)+": exact;\n") - ingress.write(" }\n" - " actions={read_lable; NoAction;}\n" - " default_action = NoAction;\n" - " size = "+str(config['num_classes']**config['num_hps'])+";\n" - " }\n\n") - - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for c in range(0, config['num_classes']): - ingress.write(" meta.vote_c"+str(c)+"=0;\n") - - ingress.write(" thresh_and_bias.apply();\n") - for f in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(f) + ".apply();\n") - - for h in range(0, config['num_hps']): - ingress.write(" meta.result_hp" + str(h) + " = meta.threshold_hp"+str(h)+" - meta.result_hp"+str(h)+ ";\n") - for h in range(0, config['num_hps']): - ingress.write(" if(meta.result_hp" + str(h) + " & 0b1"+"0"*(config['num_bits']-1)+"==0){\n") - if config['num_classes']!=2: - ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][1]) + " = meta.vote_c" + str(config['hp_info'][str(h)][1]) + "+ 1;\n") - else: ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][0]) + " = meta.vote_c" + str(config['hp_info'][str(h)][0]) + "+ 1;\n") - ingress.write(" }else{") - if config['num_classes']!=2: - ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][0]) +" = meta.vote_c"+str(config['hp_info'][str(h)][0]) +"+ 1;}\n\n") - else:ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][1]) +" = meta.vote_c"+str(config['hp_info'][str(h)][1]) +"+ 1;}\n\n") - ingress.write(" decision.apply();\n") - - - -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### -def create_tables(Planter_config): - Table_entries = [] - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_hps = np.int(num_classes * (num_classes - 1) / 2) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - for f in range(num_features): - for idx in Exact_Table['f' + str(f)]: - key_value = int(idx) - Entry = {} - Entry["table"] = "SwitchIngress.lookup_feature"+str(f) - Entry["match"] = {} - Entry["match"]["hdr.Planter.feature"+str(f)] = key_value - Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) - Entry["action_params"] = {} - for hp in range(num_hps): - Entry["action_params"]["f"+str(f)+"hp"+str(hp)] = Exact_Table['f' + str(f)][idx]["hp "+str(hp)] - Table_entries += [Entry] - - - for idx in Exact_Table['decision']: - Entry = {} - Entry["table"] = "SwitchIngress.decision" - Entry["match"] = {} - for c in range(num_classes): - Entry["match"]["meta.vote_c"+str(c)] = np.int(Exact_Table['decision'][idx]['c'+str(c)+' vote']) - Entry["action_name"] = "SwitchIngress.read_lable" - Entry["action_params"] = {} - Entry["action_params"]["label"] = np.int(Exact_Table['decision'][idx]["class"]) - Table_entries += [Entry] - - Entry = {} - Entry["table"] = "SwitchIngress.thresh_and_bias" - Entry["match"] = {} - Entry["match"]["hdr.Planter.ver"] = 1 - Entry["action_name"] = "SwitchIngress.read_thresh_and_bias" - Entry["action_params"] = {} - for hp in range(num_hps): - Entry["action_params"]["thresh_hp"+str(hp)] = np.int(Exact_Table['threshold hp'+str(hp)]) - Entry["action_params"]["bia_hp" + str(hp)] = np.int(Exact_Table['bias hp' + str(hp)]) - Table_entries += [Entry] - - Runtime = {} - Runtime["table_entries"] = Table_entries - json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) - # print('BMv2 runtime file is partly generated') - - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_hps = np.int(num_classes * (num_classes - 1) / 2) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Exact_Table['f' + str(f)]: - key = int(idx) - - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+" => ") - for hp in range(num_hps): - file.write(str(Exact_Table['f' + str(f)][idx][ "hp " + str(hp)])+" ") - file.write("\n") - file.write("\n") - - - for idx in Exact_Table['decision']: - key_value = int(idx) - file.write("table_add SwitchIngress.decision read_lable ") - for c in range(num_classes): - file.write(str(int(Exact_Table['decision'][idx]['c' + str(c) + ' vote']))+" ") - file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") - - - file.write("table_add SwitchIngress.thresh_and_bias read_thresh_and_bias 1 => ") - for hp in range(num_hps): - file.write(str(np.int(Exact_Table['threshold hp' + str(hp)]))+" ") - for hp in range(num_hps): - file.write(str(np.int(np.int(Exact_Table['bias hp' + str(hp)]))) + " ") - file.write("\n") - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - create_tables(Planter_config) - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" - "import os\n" - "import binascii\n" - "import sys\n" + - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" - "os.chdir('" + work_root + "')\n")) + - "print('working dir: ' + os.getcwd())\n" - "table = json.load(open('./Tables/" + fjson + "','r'))\n" - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for f in range(0, config['num_features']): - tload.write("print('load feature " + str(f) + " table with',len(table['f" + str(f) + "'].keys()),'entries')\n" - "for k in range(len(table['f" + str(f) + "'].keys())):\n") - tload.write(" key = str(k)\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(f) + - ".add_with_extract_feature" + str(f) + - "(int(key), ") - for h in range(0, config['num_hps']): - if h==0: - tload.write("table['f" + str(f) + "'][key]['hp " + str(h) + "']") - else: - tload.write(", table['f"+str(f)+"'][key]['hp "+str(h)+"']") - tload.write(")\n\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + - "'][key][1],table['feature " + str(f) + - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - - - tload.write("print('load thresh_and_bias table with 1 entries')\n") - tload.write((config['debug_load_table'] * "# ") + - "Ingress.thresh_and_bias.add_with_read_thresh_and_bias(" - "1, ") - for h in range(0, config['num_hps']): - if h == 0: - tload.write("table['threshold hp" + str(h) + "']") - else: - tload.write(", table['threshold hp" + str(h) + "']") - for h in range(0, config['num_hps']): - tload.write(", table['bias hp" + str(h) + "']") - tload.write(")\n\n") - - tload.write("print('load decision table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for c in range(config['num_classes']): - tload.write("table['decision'][key]['c" + str(c) + " vote'], ") - tload.write(" int(table['decision'][key]['class']))\n") - - - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_classes'] = config_file["number of classes"] + config['num_bits'] = Planter_config['p4 config']["action data bits"] + config['num_hps'] = Planter_config['p4 config']["number of hps"] + config['f_tbl_len'] = Planter_config['p4 config']["feature tbl len"] + config['thresh_and_bias'] = Planter_config['p4 config']['thresh and bias'] + config['hp_info'] = Planter_config['p4 config']['hp_info'] + + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for f in range(0, config['num_features']): + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> f" + str(f) +"_hp" + str(h)+ ";\n") + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> bias_hp" + str(h) + ";\n") + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> threshold_hp" + str(h) + ";\n") + for h in range(0, config['num_hps']): + headers.write(" bit<" + str(config['num_bits']) + "> result_hp" + str(h) + ";\n") + for c in range(0, config['num_classes']): + headers.write(" bit<4> vote_c" + str(c) + ";\n") + + # headers.write("}\n\n") + + + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for f in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(f)+'(') + for h in range(0, config['num_hps']): + if h==0: + ingress.write("bit<" + str(config['num_bits']) + "> f" + str(f) + "hp" + str(h)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> f"+str(f)+"hp"+str(h)) + ingress.write("){\n") + for h in range(0, config['num_hps']): + ingress.write(" meta.result_hp" + str(h)+" = meta.result_hp" + str(h)+" + f"+str(f)+"hp"+str(h) +";\n") + ingress.write(" }\n\n") + + + + for f in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(f) + " {\n" + " key = { hdr.Planter.feature" + str(f) + ":exact; }\n" + " actions = {\n" + " extract_feature" + str(f) + "();\n" + " NoAction;\n" + " }\n" + " size = " + str( config['f_tbl_len'][f]) + ";\n" + " default_action = NoAction;\n" + " }\n\n") + + + ingress.write(" action read_thresh_and_bias(") + for h in range(0, config['num_hps']): + if h==0: + ingress.write("bit<" + str(config['num_bits']) + "> thresh_hp" + str(h)) + else: + ingress.write(", bit<" + str(config['num_bits']) + "> thresh_hp"+str(h)) + for h in range(0, config['num_hps']): + ingress.write(", bit<" + str(config['num_bits']) + "> bia_hp" + str(h)) + ingress.write("){\n") + for h in range(0, config['num_hps']): + ingress.write(" meta.threshold_hp" + str(h)+ " = thresh_hp"+str(h) +";\n") + for h in range(0, config['num_hps']): + ingress.write(" meta.result_hp" + str(h)+ " = bia_hp"+str(h) +";\n") + ingress.write(" }\n\n") + + + + ingress.write(" table thresh_and_bias {\n" + " key = {hdr.Planter.ver:exact;}\n" + " actions={read_thresh_and_bias; NoAction;}\n" + " default_action = NoAction;\n" + " size = 1;\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + + ingress.write(" table decision {\n" + " key = {\n") + for c in range(config['num_classes']): + ingress.write(" meta.vote_c"+str(c)+": exact;\n") + ingress.write(" }\n" + " actions={read_lable; NoAction;}\n" + " default_action = NoAction;\n" + " size = "+str(config['num_classes']**config['num_hps'])+";\n" + " }\n\n") + + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for c in range(0, config['num_classes']): + ingress.write(" meta.vote_c"+str(c)+"=0;\n") + + ingress.write(" thresh_and_bias.apply();\n") + for f in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(f) + ".apply();\n") + + for h in range(0, config['num_hps']): + ingress.write(" meta.result_hp" + str(h) + " = meta.threshold_hp"+str(h)+" - meta.result_hp"+str(h)+ ";\n") + for h in range(0, config['num_hps']): + ingress.write(" if(meta.result_hp" + str(h) + " & 0b1"+"0"*(config['num_bits']-1)+"==0){\n") + if config['num_classes']!=2: + ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][1]) + " = meta.vote_c" + str(config['hp_info'][str(h)][1]) + "+ 1;\n") + else: ingress.write( " meta.vote_c" + str(config['hp_info'][str(h)][0]) + " = meta.vote_c" + str(config['hp_info'][str(h)][0]) + "+ 1;\n") + ingress.write(" }else{") + if config['num_classes']!=2: + ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][0]) +" = meta.vote_c"+str(config['hp_info'][str(h)][0]) +"+ 1;}\n\n") + else:ingress.write("meta.vote_c"+str(config['hp_info'][str(h)][1]) +" = meta.vote_c"+str(config['hp_info'][str(h)][1]) +"+ 1;}\n\n") + ingress.write(" decision.apply();\n") + + + +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### +def create_tables(Planter_config): + Table_entries = [] + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_hps = np.int(num_classes * (num_classes - 1) / 2) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + for f in range(num_features): + for idx in Exact_Table['f' + str(f)]: + key_value = int(idx) + Entry = {} + Entry["table"] = "SwitchIngress.lookup_feature"+str(f) + Entry["match"] = {} + Entry["match"]["hdr.Planter.feature"+str(f)] = key_value + Entry["action_name"] = "SwitchIngress.extract_feature"+str(f) + Entry["action_params"] = {} + for hp in range(num_hps): + Entry["action_params"]["f"+str(f)+"hp"+str(hp)] = Exact_Table['f' + str(f)][idx]["hp "+str(hp)] + Table_entries += [Entry] + + + for idx in Exact_Table['decision']: + Entry = {} + Entry["table"] = "SwitchIngress.decision" + Entry["match"] = {} + for c in range(num_classes): + Entry["match"]["meta.vote_c"+str(c)] = np.int(Exact_Table['decision'][idx]['c'+str(c)+' vote']) + Entry["action_name"] = "SwitchIngress.read_lable" + Entry["action_params"] = {} + Entry["action_params"]["label"] = np.int(Exact_Table['decision'][idx]["class"]) + Table_entries += [Entry] + + Entry = {} + Entry["table"] = "SwitchIngress.thresh_and_bias" + Entry["match"] = {} + Entry["match"]["hdr.Planter.ver"] = 1 + Entry["action_name"] = "SwitchIngress.read_thresh_and_bias" + Entry["action_params"] = {} + for hp in range(num_hps): + Entry["action_params"]["thresh_hp"+str(hp)] = np.int(Exact_Table['threshold hp'+str(hp)]) + Entry["action_params"]["bia_hp" + str(hp)] = np.int(Exact_Table['bias hp' + str(hp)]) + Table_entries += [Entry] + + Runtime = {} + Runtime["table_entries"] = Table_entries + json.dump(Runtime, open('Tables/Runtime.json', 'w'), indent=4) + # print('BMv2 runtime file is partly generated') + + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_hps = np.int(num_classes * (num_classes - 1) / 2) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Exact_Table['f' + str(f)]: + key = int(idx) + + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+" => ") + for hp in range(num_hps): + file.write(str(Exact_Table['f' + str(f)][idx][ "hp " + str(hp)])+" ") + file.write("\n") + file.write("\n") + + + for idx in Exact_Table['decision']: + key_value = int(idx) + file.write("table_add SwitchIngress.decision read_lable ") + for c in range(num_classes): + file.write(str(int(Exact_Table['decision'][idx]['c' + str(c) + ' vote']))+" ") + file.write("=> "+str(Exact_Table['decision'][idx]['class'])+"\n") + + + file.write("table_add SwitchIngress.thresh_and_bias read_thresh_and_bias 1 => ") + for hp in range(num_hps): + file.write(str(np.int(Exact_Table['threshold hp' + str(hp)]))+" ") + for hp in range(num_hps): + file.write(str(np.int(np.int(Exact_Table['bias hp' + str(hp)]))) + " ") + file.write("\n") + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + create_tables(Planter_config) + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" + "import os\n" + "import binascii\n" + "import sys\n" + + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" + "os.chdir('" + work_root + "')\n")) + + "print('working dir: ' + os.getcwd())\n" + "table = json.load(open('./Tables/" + fjson + "','r'))\n" + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for f in range(0, config['num_features']): + tload.write("print('load feature " + str(f) + " table with',len(table['f" + str(f) + "'].keys()),'entries')\n" + "for k in range(len(table['f" + str(f) + "'].keys())):\n") + tload.write(" key = str(k)\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(f) + + ".add_with_extract_feature" + str(f) + + "(int(key), ") + for h in range(0, config['num_hps']): + if h==0: + tload.write("table['f" + str(f) + "'][key]['hp " + str(h) + "']") + else: + tload.write(", table['f"+str(f)+"'][key]['hp "+str(h)+"']") + tload.write(")\n\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str(f) + + "'][key][1],table['feature " + str(f) + + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + + + tload.write("print('load thresh_and_bias table with 1 entries')\n") + tload.write((config['debug_load_table'] * "# ") + + "Ingress.thresh_and_bias.add_with_read_thresh_and_bias(" + "1, ") + for h in range(0, config['num_hps']): + if h == 0: + tload.write("table['threshold hp" + str(h) + "']") + else: + tload.write(", table['threshold hp" + str(h) + "']") + for h in range(0, config['num_hps']): + tload.write(", table['bias hp" + str(h) + "']") + tload.write(")\n\n") + + tload.write("print('load decision table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for c in range(config['num_classes']): + tload.write("table['decision'][key]['c" + str(c) + " vote'], ") + tload.write(" int(table['decision'][key]['class']))\n") + + + + diff --git a/src/models/SVM/Type_LB/readme.md b/src/models/SVM/Type_LB/readme.md index 2fe18a5..0668a27 100644 --- a/src/models/SVM/Type_LB/readme.md +++ b/src/models/SVM/Type_LB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for SVM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for SVM. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/SVM/Type_LB/table_generator.py b/src/models/SVM/Type_LB/table_generator.py index febbdee..c25424c 100755 --- a/src/models/SVM/Type_LB/table_generator.py +++ b/src/models/SVM/Type_LB/table_generator.py @@ -1,322 +1,322 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -import time - -from sklearn.cluster import KMeans -from sklearn.metrics import accuracy_score -from sklearn.preprocessing import LabelEncoder -from sklearn.model_selection import train_test_split -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn.metrics import * -from sklearn.svm import SVC -from sklearn.svm import LinearSVC -from sklearn.naive_bayes import GaussianNB -import pydotplus -from sklearn.linear_model import SGDClassifier -import math - -import os -import sys -# import grpc -import json -from src.functions.json_encoder import * -from src.functions.normalization import * - - - - -def ten_to_bin(num,count): - num= num.astype(np.int) - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - - -def votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num): - if class_num == num_classes: - if np.sum(vote_list) == num_votes: - g_table['decision'][num] = {} - for c in range(num_classes): - g_table['decision'][num]['c'+str(c)+' vote'] = vote_list[c] - g_table['decision'][num]['class'] = vote_list.index(np.max(vote_list)) - num += 1 - return g_table, num - else: - for v in range(num_votes+1): - vote_list[class_num] = v - class_num += 1 - g_table, num = votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num) - class_num -= 1 - return g_table, num - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') - Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) - - - num_bits = Planter_config['model config']['number of bits'] - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_hps = np.int(num_classes * (num_classes - 1) / 2) - - - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [max(t_t)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - #SVM instance - SVM = SVC(kernel = 'linear') - SVM.fit(train_X, train_y) - sklearn_y_predict = SVM.predict(test_X) - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - coe = SVM.coef_ - int = SVM.intercept_ - - - outputfile = 'src/temp/svm.txt' - model = open(outputfile,"w+") - for i in range(len(coe)): - model.write("hyperplane"+str(i)+" = ") - for f in range(num_features): - model.write(str(coe[i][f]) + "x"+str(f+1)+" + ") - model.write(str(int[i])) - model.write(";\n") - model.close() - - # Table without fitting to switch - SVM_separate_table = {} - - value_info = {} - for hp in range(num_hps): - SVM_separate_table["bias hp"+str(hp)]=int[hp] - value_info["hp "+str(hp)] = {} - value_info["hp " + str(hp)]["max"] = int[hp] - value_info["hp " + str(hp)]["min"] = int[hp] - - - - for i,fn in enumerate(feature_names): - - SVM_separate_table[fn] = {} - for feature in range(feature_max[i]+1): - SVM_separate_table[fn][feature] = {} - for hp in range(num_hps): - middle_value = coe[hp][i] * feature - SVM_separate_table[fn][feature]["hp "+str(hp)] = middle_value - if middle_value > value_info["hp " + str(hp)]["max"]: - value_info["hp " + str(hp)]["max"] = middle_value - if middle_value < value_info["hp " + str(hp)]["min"]: - value_info["hp " + str(hp)]["min"] = middle_value - - for hp in range(num_hps): - SVM_separate_table['threshold hp'+str(hp)] = 0 - - # Table fit to switch - scale = np.floor((2**num_bits)/ (value_info["hp " + str(hp)]["max"] - value_info["hp " + str(hp)]["min"])/num_features) - Exact_Table = {} - - print("Generating decision table...", end="") - Exact_Table['decision'] = {} - Exact_Table, _ = votes_to_class(0, np.zeros(num_classes).tolist(), num_hps, num_classes, Exact_Table, 0) - print('Done') - - for hp in range(num_hps): - x = SVM_separate_table["bias hp"+str(hp)] - min_x = value_info["hp " + str(hp)]["min"] - max_x = value_info["hp " + str(hp)]["max"] - - Exact_Table['threshold hp' + str(hp)] = -np.int(scale*((num_features + 1) * min_x)) - Exact_Table["bias hp" + str(hp)] = np.int(scale*(x - min_x)) - - - - for i,fn in enumerate(feature_names): - Exact_Table[fn] = {} - for feature in range(feature_max[i]+1): - Exact_Table[fn][feature] = {} - for hp in range(num_hps): - x = SVM_separate_table[fn][feature]["hp "+str(hp)] - min_x = value_info["hp " + str(hp)]["min"] - max_x = value_info["hp " + str(hp)]["max"] - - Exact_Table[fn][feature]["hp "+str(hp)] = np.int(scale*(x - min_x)) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Exact_Table,open('Tables/Exact_Table.json','w'),indent=4) - print('Exact_Table is generated') - - feature_tbl_len = [] - for f in range(num_features): - feature_tbl_len += [len(Exact_Table['f'+str(f)].keys())] - - thresh_and_bias = '' - for h in range(num_hps): - if h == 0: - thresh_and_bias += str(Exact_Table['threshold hp' + str(h)]) - else: - thresh_and_bias += (', '+ str(Exact_Table['threshold hp' + str(h)])) - for h in range(num_hps): - thresh_and_bias += (', ' + str(Exact_Table['bias hp' + str(h)])) - - hp_info = {} - count = 0 - initial = 0 - while True: - for c in range(num_classes): - if c > initial: - hp_info[count] = [initial, c] - count += 1 - initial += 1 - if initial >= num_classes - 1: - break - - - Planter_config['p4 config'] = {} - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "SVM" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["action data bits"] = num_bits - Planter_config['p4 config']["number of hps"] = num_hps - Planter_config['p4 config']["feature tbl len"] = feature_tbl_len - Planter_config['p4 config']['table name'] = 'Exact_Table.json' - Planter_config['p4 config']['thresh and bias'] = thresh_and_bias - Planter_config['p4 config']['hp_info'] = hp_info - - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - return sklearn_y_predict.tolist() - - -def test_tables(sklearn_test_y, test_X, test_y): - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - print("Test the generated table") - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - - hp_info = {} - count = 0 - initial = 0 - while True: - for c in range(num_classes): - if c> initial: - hp_info[count] = [initial,c] - count+=1 - initial+=1 - if initial >= num_classes-1: - break - - - for i in range(np.shape(test_X.values)[0]): - class_vote = np.zeros(num_classes).tolist() - input_feature_value = test_X.values[i] - for hp in range(np.int(num_classes * (num_classes - 1) / 2)): - hp_value = 0 - for f in range(num_features): - hp_value += Exact_Table["f"+str(f)][str(input_feature_value[f])]["hp "+str(hp)] - hp_value += Exact_Table["bias hp"+str(hp)] - if num_classes ==2: - if hp_value>Exact_Table["threshold hp"+str(hp)]: - class_vote[hp_info[hp][1]] += 1 - else: - class_vote[hp_info[hp][0]] += 1 - else: - if hp_value>Exact_Table["threshold hp"+str(hp)]: - class_vote[hp_info[hp][0]] += 1 - else: - class_vote[hp_info[hp][1]] += 1 - - switch_prediction = class_vote.index(np.max(class_vote)) - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 10 == 0 and i != 0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), - end="") - - - - print('\nThe accuracy of the match action format of SVM is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +import time + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +from sklearn.model_selection import train_test_split +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn.metrics import * +from sklearn.svm import SVC +from sklearn.svm import LinearSVC +from sklearn.naive_bayes import GaussianNB +import pydotplus +from sklearn.linear_model import SGDClassifier +import math + +import os +import sys +# import grpc +import json +from src.functions.json_encoder import * +from src.functions.normalization import * + + + + +def ten_to_bin(num,count): + num= num.astype(np.int) + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + + +def votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num): + if class_num == num_classes: + if np.sum(vote_list) == num_votes: + g_table['decision'][num] = {} + for c in range(num_classes): + g_table['decision'][num]['c'+str(c)+' vote'] = vote_list[c] + g_table['decision'][num]['class'] = vote_list.index(np.max(vote_list)) + num += 1 + return g_table, num + else: + for v in range(num_votes+1): + vote_list[class_num] = v + class_num += 1 + g_table, num = votes_to_class(class_num, vote_list, num_votes, num_classes, g_table, num) + class_num -= 1 + return g_table, num + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + Planter_config['model config']['number of bits'] = np.int(input('- Number of bits for each action data? (default = 16) ') or '16') + Planter_config['model config']['number of classes'] = np.int(np.max(train_y) + 1) + + + num_bits = Planter_config['model config']['number of bits'] + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_hps = np.int(num_classes * (num_classes - 1) / 2) + + + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [max(t_t)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + #SVM instance + SVM = SVC(kernel = 'linear') + SVM.fit(train_X, train_y) + sklearn_y_predict = SVM.predict(test_X) + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + coe = SVM.coef_ + int = SVM.intercept_ + + + outputfile = 'src/temp/svm.txt' + model = open(outputfile,"w+") + for i in range(len(coe)): + model.write("hyperplane"+str(i)+" = ") + for f in range(num_features): + model.write(str(coe[i][f]) + "x"+str(f+1)+" + ") + model.write(str(int[i])) + model.write(";\n") + model.close() + + # Table without fitting to switch + SVM_separate_table = {} + + value_info = {} + for hp in range(num_hps): + SVM_separate_table["bias hp"+str(hp)]=int[hp] + value_info["hp "+str(hp)] = {} + value_info["hp " + str(hp)]["max"] = int[hp] + value_info["hp " + str(hp)]["min"] = int[hp] + + + + for i,fn in enumerate(feature_names): + + SVM_separate_table[fn] = {} + for feature in range(feature_max[i]+1): + SVM_separate_table[fn][feature] = {} + for hp in range(num_hps): + middle_value = coe[hp][i] * feature + SVM_separate_table[fn][feature]["hp "+str(hp)] = middle_value + if middle_value > value_info["hp " + str(hp)]["max"]: + value_info["hp " + str(hp)]["max"] = middle_value + if middle_value < value_info["hp " + str(hp)]["min"]: + value_info["hp " + str(hp)]["min"] = middle_value + + for hp in range(num_hps): + SVM_separate_table['threshold hp'+str(hp)] = 0 + + # Table fit to switch + scale = np.floor((2**num_bits)/ (value_info["hp " + str(hp)]["max"] - value_info["hp " + str(hp)]["min"])/num_features) + Exact_Table = {} + + print("Generating decision table...", end="") + Exact_Table['decision'] = {} + Exact_Table, _ = votes_to_class(0, np.zeros(num_classes).tolist(), num_hps, num_classes, Exact_Table, 0) + print('Done') + + for hp in range(num_hps): + x = SVM_separate_table["bias hp"+str(hp)] + min_x = value_info["hp " + str(hp)]["min"] + max_x = value_info["hp " + str(hp)]["max"] + + Exact_Table['threshold hp' + str(hp)] = -np.int(scale*((num_features + 1) * min_x)) + Exact_Table["bias hp" + str(hp)] = np.int(scale*(x - min_x)) + + + + for i,fn in enumerate(feature_names): + Exact_Table[fn] = {} + for feature in range(feature_max[i]+1): + Exact_Table[fn][feature] = {} + for hp in range(num_hps): + x = SVM_separate_table[fn][feature]["hp "+str(hp)] + min_x = value_info["hp " + str(hp)]["min"] + max_x = value_info["hp " + str(hp)]["max"] + + Exact_Table[fn][feature]["hp "+str(hp)] = np.int(scale*(x - min_x)) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Exact_Table,open('Tables/Exact_Table.json','w'),indent=4) + print('Exact_Table is generated') + + feature_tbl_len = [] + for f in range(num_features): + feature_tbl_len += [len(Exact_Table['f'+str(f)].keys())] + + thresh_and_bias = '' + for h in range(num_hps): + if h == 0: + thresh_and_bias += str(Exact_Table['threshold hp' + str(h)]) + else: + thresh_and_bias += (', '+ str(Exact_Table['threshold hp' + str(h)])) + for h in range(num_hps): + thresh_and_bias += (', ' + str(Exact_Table['bias hp' + str(h)])) + + hp_info = {} + count = 0 + initial = 0 + while True: + for c in range(num_classes): + if c > initial: + hp_info[count] = [initial, c] + count += 1 + initial += 1 + if initial >= num_classes - 1: + break + + + Planter_config['p4 config'] = {} + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "SVM" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["action data bits"] = num_bits + Planter_config['p4 config']["number of hps"] = num_hps + Planter_config['p4 config']["feature tbl len"] = feature_tbl_len + Planter_config['p4 config']['table name'] = 'Exact_Table.json' + Planter_config['p4 config']['thresh and bias'] = thresh_and_bias + Planter_config['p4 config']['hp_info'] = hp_info + + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + return sklearn_y_predict.tolist() + + +def test_tables(sklearn_test_y, test_X, test_y): + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + print("Test the generated table") + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + + hp_info = {} + count = 0 + initial = 0 + while True: + for c in range(num_classes): + if c> initial: + hp_info[count] = [initial,c] + count+=1 + initial+=1 + if initial >= num_classes-1: + break + + + for i in range(np.shape(test_X.values)[0]): + class_vote = np.zeros(num_classes).tolist() + input_feature_value = test_X.values[i] + for hp in range(np.int(num_classes * (num_classes - 1) / 2)): + hp_value = 0 + for f in range(num_features): + hp_value += Exact_Table["f"+str(f)][str(input_feature_value[f])]["hp "+str(hp)] + hp_value += Exact_Table["bias hp"+str(hp)] + if num_classes ==2: + if hp_value>Exact_Table["threshold hp"+str(hp)]: + class_vote[hp_info[hp][1]] += 1 + else: + class_vote[hp_info[hp][0]] += 1 + else: + if hp_value>Exact_Table["threshold hp"+str(hp)]: + class_vote[hp_info[hp][0]] += 1 + else: + class_vote[hp_info[hp][1]] += 1 + + switch_prediction = class_vote.index(np.max(class_vote)) + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 10 == 0 and i != 0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.4}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i]), accuracy_score(sklearn_test_y[:i], test_y[:i])), + end="") + + + + print('\nThe accuracy of the match action format of SVM is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["feature tbl len"]) ) + + diff --git a/src/models/SVM/readme.md b/src/models/SVM/readme.md index bf3085c..0d19c49 100644 --- a/src/models/SVM/readme.md +++ b/src/models/SVM/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported SVM. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported SVM. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/XGB/Type_1/dedicated_p4.py b/src/models/XGB/Type_1/dedicated_p4.py index 9d84adc..a4540ca 100755 --- a/src/models/XGB/Type_1/dedicated_p4.py +++ b/src/models/XGB/Type_1/dedicated_p4.py @@ -1,312 +1,312 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in Ternary_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Ternary_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + - "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in Ternary_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Ternary_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/XGB/Type_1/readme.md b/src/models/XGB/Type_1/readme.md index b91f444..50d6d54 100644 --- a/src/models/XGB/Type_1/readme.md +++ b/src/models/XGB/Type_1/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/XGB/Type_1/table_generator.py b/src/models/XGB/Type_1/table_generator.py index 7e5c8f4..bd46e8d 100755 --- a/src/models/XGB/Type_1/table_generator.py +++ b/src/models/XGB/Type_1/table_generator.py @@ -1,551 +1,551 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn import tree -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import classification_report -import xgboost as xgb -import copy -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import math -import time -import re -import json - -from sklearn.metrics import * - - - -def map(value): - value = value - return value - -def get_path(model, conditions, path, num, leaf_info, tree_index): - if 'children' in model.keys(): - conditions_yes = copy.deepcopy(conditions) - conditions_no = copy.deepcopy(conditions) - if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: - conditions_yes[model["split"]][1] = map(model["split_condition"])-1 - if conditions_no[model["split"]][0] < map(model["split_condition"]) : - conditions_no[model["split"]][0] = map(model["split_condition"]) - for child_model in model["children"]: - if child_model["nodeid"]==model["yes"]: - path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) - if child_model["nodeid"]==model["no"]: - path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) - else: - # print(path, conditions) - path['path '+str(num)] = conditions - path['path '+str(num)]['leaf'] = model["leaf"] - # leaf_info['tree '+str(tree_index)] += [model["leaf"]] - leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] - if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] - elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] - num += 1 - return path, num, leaf_info - - -def find_feature_split(model, tree_index, num_features, feature_names): - count_layer = 0 - count_route = 0 - count_list = 0 - layer = {} - route = {} - layer[count_layer] = {} - layer[count_layer][count_list] = {} - layer[count_layer][count_list]["lst"] = [0] - layer[count_layer][count_list]["tab"] = model - feature_split = {} - num_features = len(feature_names) - - for i in range(num_features): - feature_split["feature " + str(i)] = [] - while True: - if len(layer[count_layer].keys()) == 0: - break - layer[count_layer + 1] = {} - count_list = 0 - for list_id in layer[count_layer]: - feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ - layer[count_layer][list_id]["tab"]["split_condition"]] - # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range - - for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): - if "children" not in children.keys(): - route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] - count_route += 1 - else: - layer[count_layer + 1][count_list] = {} - layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ - children["nodeid"]] - layer[count_layer + 1][count_list]["tab"] = children - count_list += 1 - count_layer += 1 - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - -def path_to_path_to_leaf(path, num_features, table, leaf_code_list): - path_to_leaf ={} - for p in path: - path_to_leaf[p] = {} - path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) - for f in range(num_features): - ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] - end = table['feature '+str(f)][path[p]['f'+str(f)][1]] - path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() - return path_to_leaf - - -def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): - conditions = {} - for i in range(num_features): - conditions["f" + str(i)] = [0, feature_max[i]] - feature_split["feature " + str(i)] += [feature_max[i]] - - path = {} - path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) - leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) - path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) - return path_to_leaf, leaf_info - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): - - feature_split = find_feature_split(model, tree_index, num_features, feature_names) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - leaf_info['tree '+str(tree_index)] = [] - path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) - - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = np.ceil(math.log( - g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table, leaf_info - - -def ten_to_bin(num,count): - num = bin(int(num)).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def MaxMin_Norm_with_range(x, min , max, ranges = 10): - """[0,1] normaliaztion""" - x = (x - min) / (max - min) - return np.floor(ranges*x) - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # XGBoost - - data_train = xgb.DMatrix(train_X, label=train_y) - data_test = xgb.DMatrix(test_X, label=test_y) - watchlist = [(data_test, 'eval'), (data_train, 'train')] - param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} - bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) - - # param = {'max_depth': 8, 'num_class': 2} - # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) - bst.dump_model("src/temp/tree.txt") - sklearn_y_predict = bst.predict(data_test) - - result = classification_report(test_y, sklearn_y_predict) - # exit() - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ("num_feature: " + str(num_features)) not in log_dict: - log_dict["num_feature: " + str(num_features)] = {} - if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} - if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ - "num_tree: " + str(num_trees)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ - "num_depth: " + str(num_depth)] = {} - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "classification_report"] = result - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "max number of leaf nodes"] = max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print('Classification results are downloaded to log as', log_file) - - - the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") - xgb_model = {} - for i, m in enumerate(the_model): - xgb_model[i] = json.loads(m) - - - - g_table = {} - leaf_info ={} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx in xgb_model: - estimator = xgb_model[idx] - g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) - - - - def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for t in range(num_trees): - vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) - tree_num -= 1 - return g_table, num - - - ranges = 10 - g_table['votes to class'] = {} - print("\nGenerating vote to class table...",end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) - print('Done') - - feature_width = [] - for maxs in feature_max: - feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' + str(i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - print("\nPreparing default vote...", end="") - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - Ternary_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - Ternary_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(Ternary_Table['tree ' + str(t)]) - print('Done') - # ===================== prepare default class ========================= - print("Preparing default class...", end="") - collect_class = np.zeros(num_classes).tolist() - for idx in Exact_Table['decision']: - collect_class[Exact_Table['decision'][idx]['class']] += 1 - default_class = collect_class.index(max(collect_class)) - - code_table_size = 0 - Ternary_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('Ternary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "XGB" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - switch_prediction = Planter_config['p4 config']["default label"] - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn import tree +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import classification_report +import xgboost as xgb +import copy +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import math +import time +import re +import json + +from sklearn.metrics import * + + + +def map(value): + value = value + return value + +def get_path(model, conditions, path, num, leaf_info, tree_index): + if 'children' in model.keys(): + conditions_yes = copy.deepcopy(conditions) + conditions_no = copy.deepcopy(conditions) + if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: + conditions_yes[model["split"]][1] = map(model["split_condition"])-1 + if conditions_no[model["split"]][0] < map(model["split_condition"]) : + conditions_no[model["split"]][0] = map(model["split_condition"]) + for child_model in model["children"]: + if child_model["nodeid"]==model["yes"]: + path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) + if child_model["nodeid"]==model["no"]: + path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) + else: + # print(path, conditions) + path['path '+str(num)] = conditions + path['path '+str(num)]['leaf'] = model["leaf"] + # leaf_info['tree '+str(tree_index)] += [model["leaf"]] + leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] + if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] + elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] + num += 1 + return path, num, leaf_info + + +def find_feature_split(model, tree_index, num_features, feature_names): + count_layer = 0 + count_route = 0 + count_list = 0 + layer = {} + route = {} + layer[count_layer] = {} + layer[count_layer][count_list] = {} + layer[count_layer][count_list]["lst"] = [0] + layer[count_layer][count_list]["tab"] = model + feature_split = {} + num_features = len(feature_names) + + for i in range(num_features): + feature_split["feature " + str(i)] = [] + while True: + if len(layer[count_layer].keys()) == 0: + break + layer[count_layer + 1] = {} + count_list = 0 + for list_id in layer[count_layer]: + feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ + layer[count_layer][list_id]["tab"]["split_condition"]] + # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range + + for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): + if "children" not in children.keys(): + route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] + count_route += 1 + else: + layer[count_layer + 1][count_list] = {} + layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ + children["nodeid"]] + layer[count_layer + 1][count_list]["tab"] = children + count_list += 1 + count_layer += 1 + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + +def path_to_path_to_leaf(path, num_features, table, leaf_code_list): + path_to_leaf ={} + for p in path: + path_to_leaf[p] = {} + path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) + for f in range(num_features): + ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] + end = table['feature '+str(f)][path[p]['f'+str(f)][1]] + path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() + return path_to_leaf + + +def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): + conditions = {} + for i in range(num_features): + conditions["f" + str(i)] = [0, feature_max[i]] + feature_split["feature " + str(i)] += [feature_max[i]] + + path = {} + path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) + leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) + path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) + return path_to_leaf, leaf_info + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): + + feature_split = find_feature_split(model, tree_index, num_features, feature_names) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + leaf_info['tree '+str(tree_index)] = [] + path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) + + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = np.ceil(math.log( + g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table, leaf_info + + +def ten_to_bin(num,count): + num = bin(int(num)).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def MaxMin_Norm_with_range(x, min , max, ranges = 10): + """[0,1] normaliaztion""" + x = (x - min) / (max - min) + return np.floor(ranges*x) + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # XGBoost + + data_train = xgb.DMatrix(train_X, label=train_y) + data_test = xgb.DMatrix(test_X, label=test_y) + watchlist = [(data_test, 'eval'), (data_train, 'train')] + param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} + bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) + + # param = {'max_depth': 8, 'num_class': 2} + # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) + bst.dump_model("src/temp/tree.txt") + sklearn_y_predict = bst.predict(data_test) + + result = classification_report(test_y, sklearn_y_predict) + # exit() + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ("num_feature: " + str(num_features)) not in log_dict: + log_dict["num_feature: " + str(num_features)] = {} + if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} + if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ + "num_tree: " + str(num_trees)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ + "num_depth: " + str(num_depth)] = {} + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "classification_report"] = result + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "max number of leaf nodes"] = max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print('Classification results are downloaded to log as', log_file) + + + the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") + xgb_model = {} + for i, m in enumerate(the_model): + xgb_model[i] = json.loads(m) + + + + g_table = {} + leaf_info ={} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx in xgb_model: + estimator = xgb_model[idx] + g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) + + + + def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for t in range(num_trees): + vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) + tree_num -= 1 + return g_table, num + + + ranges = 10 + g_table['votes to class'] = {} + print("\nGenerating vote to class table...",end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) + print('Done') + + feature_width = [] + for maxs in feature_max: + feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' + str(i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + print("\nPreparing default vote...", end="") + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + Ternary_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + Ternary_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(Ternary_Table['tree ' + str(t)]) + print('Done') + # ===================== prepare default class ========================= + print("Preparing default class...", end="") + collect_class = np.zeros(num_classes).tolist() + for idx in Exact_Table['decision']: + collect_class[Exact_Table['decision'][idx]['class']] += 1 + default_class = collect_class.index(max(collect_class)) + + code_table_size = 0 + Ternary_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('Ternary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "XGB" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + switch_prediction = Planter_config['p4 config']["default label"] + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/XGB/Type_2/dedicated_p4.py b/src/models/XGB/Type_2/dedicated_p4.py index afe247a..5e23637 100755 --- a/src/models/XGB/Type_2/dedicated_p4.py +++ b/src/models/XGB/Type_2/dedicated_p4.py @@ -1,319 +1,319 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+"."+ - str(((1<<8)-1)&(key))+ "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+"."+ + str(((1<<8)-1)&(key))+ "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/XGB/Type_2/readme.md b/src/models/XGB/Type_2/readme.md index b91f444..50d6d54 100644 --- a/src/models/XGB/Type_2/readme.md +++ b/src/models/XGB/Type_2/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/XGB/Type_2/table_generator.py b/src/models/XGB/Type_2/table_generator.py index a641a80..979a96e 100755 --- a/src/models/XGB/Type_2/table_generator.py +++ b/src/models/XGB/Type_2/table_generator.py @@ -1,551 +1,551 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn import tree -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import classification_report -import xgboost as xgb -import copy -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -import math -import time -import re -import json - -from sklearn.metrics import * - - - -def map(value): - value = value - return value - -def get_path(model, conditions, path, num, leaf_info, tree_index): - if 'children' in model.keys(): - conditions_yes = copy.deepcopy(conditions) - conditions_no = copy.deepcopy(conditions) - if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: - conditions_yes[model["split"]][1] = map(model["split_condition"])-1 - if conditions_no[model["split"]][0] < map(model["split_condition"]) : - conditions_no[model["split"]][0] = map(model["split_condition"]) - for child_model in model["children"]: - if child_model["nodeid"]==model["yes"]: - path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) - if child_model["nodeid"]==model["no"]: - path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) - else: - # print(path, conditions) - path['path '+str(num)] = conditions - path['path '+str(num)]['leaf'] = model["leaf"] - # leaf_info['tree '+str(tree_index)] += [model["leaf"]] - leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] - if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] - elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] - num += 1 - return path, num, leaf_info - - -def find_feature_split(model, tree_index, num_features, feature_names): - count_layer = 0 - count_route = 0 - count_list = 0 - layer = {} - route = {} - layer[count_layer] = {} - layer[count_layer][count_list] = {} - layer[count_layer][count_list]["lst"] = [0] - layer[count_layer][count_list]["tab"] = model - feature_split = {} - num_features = len(feature_names) - - for i in range(num_features): - feature_split["feature " + str(i)] = [] - while True: - if len(layer[count_layer].keys()) == 0: - break - layer[count_layer + 1] = {} - count_list = 0 - for list_id in layer[count_layer]: - feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ - layer[count_layer][list_id]["tab"]["split_condition"]] - # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range - - for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): - if "children" not in children.keys(): - route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] - count_route += 1 - else: - layer[count_layer + 1][count_list] = {} - layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ - children["nodeid"]] - layer[count_layer + 1][count_list]["tab"] = children - count_list += 1 - count_layer += 1 - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - -def path_to_path_to_leaf(path, num_features, table, leaf_code_list): - path_to_leaf ={} - for p in path: - path_to_leaf[p] = {} - path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) - for f in range(num_features): - ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] - end = table['feature '+str(f)][path[p]['f'+str(f)][1]] - path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() - return path_to_leaf - - -def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): - conditions = {} - for i in range(num_features): - conditions["f" + str(i)] = [0, feature_max[i]] - feature_split["feature " + str(i)] += [feature_max[i]] - - path = {} - path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) - leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) - path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) - return path_to_leaf, leaf_info - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): - - feature_split = find_feature_split(model, tree_index, num_features, feature_names) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - leaf_info['tree '+str(tree_index)] = [] - path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) - - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = np.ceil(math.log( - g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table, leaf_info - - -def ten_to_bin(num,count): - num = bin(int(num)).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def MaxMin_Norm_with_range(x, min , max, ranges = 10): - """[0,1] normaliaztion""" - x = (x - min) / (max - min) - return np.floor(ranges*x) - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # XGBoost - - data_train = xgb.DMatrix(train_X, label=train_y) - data_test = xgb.DMatrix(test_X, label=test_y) - watchlist = [(data_test, 'eval'), (data_train, 'train')] - param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} - bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) - - bst.dump_model("src/temp/tree.txt") - sklearn_y_predict = bst.predict(data_test) - - result = classification_report(test_y, sklearn_y_predict) - # exit() - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ("num_feature: " + str(num_features)) not in log_dict: - log_dict["num_feature: " + str(num_features)] = {} - if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} - if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ - "num_tree: " + str(num_trees)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ - "num_depth: " + str(num_depth)] = {} - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "classification_report"] = result - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "max number of leaf nodes"] = max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print('Classification results are downloaded to log as', log_file) - - - the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") - xgb_model = {} - for i, m in enumerate(the_model): - xgb_model[i] = json.loads(m) - - - - g_table = {} - leaf_info ={} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx in xgb_model: - estimator = xgb_model[idx] - g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) - - - - def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for t in range(num_trees): - vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) - tree_num -= 1 - return g_table, num - - - ranges = 10 - g_table['votes to class'] = {} - print("\nGenerating vote to class table...",end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) - print('Done') - - feature_width = [] - for maxs in feature_max: - feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) - - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' + str(i)) - LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - print("\nPreparing default vote...", end="") - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - LPM_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) - print('Done') - # ===================== prepare default class ========================= - print("Preparing default class...", end="") - collect_class = np.zeros(num_classes).tolist() - for idx in Exact_Table['decision']: - collect_class[Exact_Table['decision'][idx]['class']] += 1 - default_class = collect_class.index(max(collect_class)) - - code_table_size = 0 - LPM_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) - print('LPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "XGB" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list)!=str(lpm_code_list): - print('error in exact to ternary match', code_list,lpm_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - switch_prediction = Planter_config['p4 config']["default label"] - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn import tree +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import classification_report +import xgboost as xgb +import copy +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +import math +import time +import re +import json + +from sklearn.metrics import * + + + +def map(value): + value = value + return value + +def get_path(model, conditions, path, num, leaf_info, tree_index): + if 'children' in model.keys(): + conditions_yes = copy.deepcopy(conditions) + conditions_no = copy.deepcopy(conditions) + if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: + conditions_yes[model["split"]][1] = map(model["split_condition"])-1 + if conditions_no[model["split"]][0] < map(model["split_condition"]) : + conditions_no[model["split"]][0] = map(model["split_condition"]) + for child_model in model["children"]: + if child_model["nodeid"]==model["yes"]: + path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) + if child_model["nodeid"]==model["no"]: + path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) + else: + # print(path, conditions) + path['path '+str(num)] = conditions + path['path '+str(num)]['leaf'] = model["leaf"] + # leaf_info['tree '+str(tree_index)] += [model["leaf"]] + leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] + if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] + elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] + num += 1 + return path, num, leaf_info + + +def find_feature_split(model, tree_index, num_features, feature_names): + count_layer = 0 + count_route = 0 + count_list = 0 + layer = {} + route = {} + layer[count_layer] = {} + layer[count_layer][count_list] = {} + layer[count_layer][count_list]["lst"] = [0] + layer[count_layer][count_list]["tab"] = model + feature_split = {} + num_features = len(feature_names) + + for i in range(num_features): + feature_split["feature " + str(i)] = [] + while True: + if len(layer[count_layer].keys()) == 0: + break + layer[count_layer + 1] = {} + count_list = 0 + for list_id in layer[count_layer]: + feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ + layer[count_layer][list_id]["tab"]["split_condition"]] + # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range + + for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): + if "children" not in children.keys(): + route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] + count_route += 1 + else: + layer[count_layer + 1][count_list] = {} + layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ + children["nodeid"]] + layer[count_layer + 1][count_list]["tab"] = children + count_list += 1 + count_layer += 1 + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + +def path_to_path_to_leaf(path, num_features, table, leaf_code_list): + path_to_leaf ={} + for p in path: + path_to_leaf[p] = {} + path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) + for f in range(num_features): + ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] + end = table['feature '+str(f)][path[p]['f'+str(f)][1]] + path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() + return path_to_leaf + + +def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): + conditions = {} + for i in range(num_features): + conditions["f" + str(i)] = [0, feature_max[i]] + feature_split["feature " + str(i)] += [feature_max[i]] + + path = {} + path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) + leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) + path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) + return path_to_leaf, leaf_info + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): + + feature_split = find_feature_split(model, tree_index, num_features, feature_names) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + leaf_info['tree '+str(tree_index)] = [] + path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) + + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = np.ceil(math.log( + g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table, leaf_info + + +def ten_to_bin(num,count): + num = bin(int(num)).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def MaxMin_Norm_with_range(x, min , max, ranges = 10): + """[0,1] normaliaztion""" + x = (x - min) / (max - min) + return np.floor(ranges*x) + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # XGBoost + + data_train = xgb.DMatrix(train_X, label=train_y) + data_test = xgb.DMatrix(test_X, label=test_y) + watchlist = [(data_test, 'eval'), (data_train, 'train')] + param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} + bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) + + bst.dump_model("src/temp/tree.txt") + sklearn_y_predict = bst.predict(data_test) + + result = classification_report(test_y, sklearn_y_predict) + # exit() + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ("num_feature: " + str(num_features)) not in log_dict: + log_dict["num_feature: " + str(num_features)] = {} + if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} + if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ + "num_tree: " + str(num_trees)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ + "num_depth: " + str(num_depth)] = {} + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "classification_report"] = result + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "max number of leaf nodes"] = max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print('Classification results are downloaded to log as', log_file) + + + the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") + xgb_model = {} + for i, m in enumerate(the_model): + xgb_model[i] = json.loads(m) + + + + g_table = {} + leaf_info ={} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx in xgb_model: + estimator = xgb_model[idx] + g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) + + + + def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for t in range(num_trees): + vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) + tree_num -= 1 + return g_table, num + + + ranges = 10 + g_table['votes to class'] = {} + print("\nGenerating vote to class table...",end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) + print('Done') + + feature_width = [] + for maxs in feature_max: + feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) + + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' + str(i)) + LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + print("\nPreparing default vote...", end="") + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + LPM_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) + print('Done') + # ===================== prepare default class ========================= + print("Preparing default class...", end="") + collect_class = np.zeros(num_classes).tolist() + for idx in Exact_Table['decision']: + collect_class[Exact_Table['decision'][idx]['class']] += 1 + default_class = collect_class.index(max(collect_class)) + + code_table_size = 0 + LPM_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) + print('LPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "XGB" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list)!=str(lpm_code_list): + print('error in exact to ternary match', code_list,lpm_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + switch_prediction = Planter_config['p4 config']["default label"] + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/XGB/Type_2_xsa/dedicated_p4.py b/src/models/XGB/Type_2_xsa/dedicated_p4.py index 19712c8..9af5701 100755 --- a/src/models/XGB/Type_2_xsa/dedicated_p4.py +++ b/src/models/XGB/Type_2_xsa/dedicated_p4.py @@ -1,427 +1,427 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config, xsa_architecture=False): - if xsa_architecture: - return - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - - -def separate_variables(fname, config): - with open(fname, 'a') as processing: - for i in range(0, config['num_features']): - processing.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - processing.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - processing.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - processing.write(" bit<7> tree_" + str(t) + "_prob;\n") - processing.write(" bit<32> DstAddr;\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config, xsa_architecture=False): - if xsa_architecture: - min_code_width = 10 - else: - min_code_width = 0 - - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - if not xsa_architecture: - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n") - if xsa_architecture: - ingress.write(" key = { hdr.Planter.feature" + str(i) + ":lpm; }\n") - else: - ingress.write(" key = { meta.feature" + str(i) + ":lpm; }\n") - ingress.write(" actions = {\n") - if xsa_architecture: - ingress.write(" extract_feature" + str(i) + "(code_f" + str(i) + ");\n") - else: - ingress.write(" extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n") - ingress.write(" NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n") - if xsa_architecture: - ingress.write( - " tree_" + str(i) + "_prob" + " = prob;\n" - " tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - else: - ingress.write( - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n") - if xsa_architecture: - ingress.write(" tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n") - else: - ingress.write(" meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n") - ingress.write(" }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - if not xsa_architecture: - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - if xsa_architecture: - key_len = 1 + int(count_code[j] + config['code_width'][i][j] - 1) - int(count_code[j]) - ingress.write( - "(bit<" + str(max(key_len, min_code_width)) + ">) code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact @name(\"lookup_leaf_id" + str(i) + "\");\n ") - else: - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n") - if xsa_architecture: - ingress.write(" hdr.Planter.result = label;\n") - else: - ingress.write(" meta.result = label;\n") - ingress.write(" }\n\n") - ingress.write(" action write_default_decision() {\n") - if xsa_architecture: - ingress.write(" hdr.Planter.result = " + str(config['default label']) + ";\n") - else: - ingress.write(" meta.result = " + str(config['default label']) + ";\n") - ingress.write(" }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - if xsa_architecture: - ingress.write("tree_" + str(t) + "_vote:exact;\n ") - else: - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+"."+ - str(((1<<8)-1)&(key))+ "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - -def create_tables_Commend_esnet(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - # priority = int(idx) - priority = len(LPM_Table['feature ' + str(f)]) - int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - - - file.write("table_add lookup_feature" + str(f) + " extract_feature" + str(f) + - " " + hex(key) + "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + hex(label) + " " + str(priority) + "\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - -def edit_tables_command_esnet_software(fname): - with open(fname, 'a') as file: - file.write( - "# run traffic\n" - "run_traffic packets\n\n" - "# end\n" - "exit\n") - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file_esnet_hardware = work_root + "/src/targets/alveo_u280/hardware/s1-commands.txt" - create_tables_Commend_esnet(commend_file_esnet_hardware, Planter_config) - - commend_file_esnet_software = work_root + "/src/targets/alveo_u280/behavioral/test_environment/sim/test-case0/runsim.txt" - create_tables_Commend_esnet(commend_file_esnet_software, Planter_config) - edit_tables_command_esnet_software(commend_file_esnet_software) - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config, xsa_architecture=False): + if xsa_architecture: + return + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + + +def separate_variables(fname, config): + with open(fname, 'a') as processing: + for i in range(0, config['num_features']): + processing.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + processing.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + processing.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + processing.write(" bit<7> tree_" + str(t) + "_prob;\n") + processing.write(" bit<32> DstAddr;\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config, xsa_architecture=False): + if xsa_architecture: + min_code_width = 10 + else: + min_code_width = 0 + + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + if not xsa_architecture: + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n") + if xsa_architecture: + ingress.write(" key = { hdr.Planter.feature" + str(i) + ":lpm; }\n") + else: + ingress.write(" key = { meta.feature" + str(i) + ":lpm; }\n") + ingress.write(" actions = {\n") + if xsa_architecture: + ingress.write(" extract_feature" + str(i) + "(code_f" + str(i) + ");\n") + else: + ingress.write(" extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n") + ingress.write(" NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n") + if xsa_architecture: + ingress.write( + " tree_" + str(i) + "_prob" + " = prob;\n" + " tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + else: + ingress.write( + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n") + if xsa_architecture: + ingress.write(" tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n") + else: + ingress.write(" meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n") + ingress.write(" }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + if not xsa_architecture: + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + if xsa_architecture: + key_len = 1 + int(count_code[j] + config['code_width'][i][j] - 1) - int(count_code[j]) + ingress.write( + "(bit<" + str(max(key_len, min_code_width)) + ">) code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact @name(\"lookup_leaf_id" + str(i) + "\");\n ") + else: + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n") + if xsa_architecture: + ingress.write(" hdr.Planter.result = label;\n") + else: + ingress.write(" meta.result = label;\n") + ingress.write(" }\n\n") + ingress.write(" action write_default_decision() {\n") + if xsa_architecture: + ingress.write(" hdr.Planter.result = " + str(config['default label']) + ";\n") + else: + ingress.write(" meta.result = " + str(config['default label']) + ";\n") + ingress.write(" }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + if xsa_architecture: + ingress.write("tree_" + str(t) + "_vote:exact;\n ") + else: + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+"."+ + str(((1<<8)-1)&(key))+ "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + +def create_tables_Commend_esnet(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + # priority = int(idx) + priority = len(LPM_Table['feature ' + str(f)]) - int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + + + file.write("table_add lookup_feature" + str(f) + " extract_feature" + str(f) + + " " + hex(key) + "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + hex(label) + " " + str(priority) + "\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + +def edit_tables_command_esnet_software(fname): + with open(fname, 'a') as file: + file.write( + "# run traffic\n" + "run_traffic packets\n\n" + "# end\n" + "exit\n") + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file_esnet_hardware = work_root + "/src/targets/alveo_u280/hardware/s1-commands.txt" + create_tables_Commend_esnet(commend_file_esnet_hardware, Planter_config) + + commend_file_esnet_software = work_root + "/src/targets/alveo_u280/behavioral/test_environment/sim/test-case0/runsim.txt" + create_tables_Commend_esnet(commend_file_esnet_software, Planter_config) + edit_tables_command_esnet_software(commend_file_esnet_software) + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/XGB/Type_2_xsa/readme.md b/src/models/XGB/Type_2_xsa/readme.md index b91f444..50d6d54 100644 --- a/src/models/XGB/Type_2_xsa/readme.md +++ b/src/models/XGB/Type_2_xsa/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/XGB/Type_2_xsa/table_generator.py b/src/models/XGB/Type_2_xsa/table_generator.py index 812e591..4b184c4 100755 --- a/src/models/XGB/Type_2_xsa/table_generator.py +++ b/src/models/XGB/Type_2_xsa/table_generator.py @@ -1,556 +1,556 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn import tree -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import classification_report -import xgboost as xgb -import copy -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -import math -import time -import re -import json - -from sklearn.metrics import * - - - -def map(value): - value = value - return value - -def get_path(model, conditions, path, num, leaf_info, tree_index): - if 'children' in model.keys(): - conditions_yes = copy.deepcopy(conditions) - conditions_no = copy.deepcopy(conditions) - if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: - conditions_yes[model["split"]][1] = map(model["split_condition"])-1 - if conditions_no[model["split"]][0] < map(model["split_condition"]) : - conditions_no[model["split"]][0] = map(model["split_condition"]) - for child_model in model["children"]: - if child_model["nodeid"]==model["yes"]: - path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) - if child_model["nodeid"]==model["no"]: - path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) - else: - # print(path, conditions) - path['path '+str(num)] = conditions - path['path '+str(num)]['leaf'] = model["leaf"] - # leaf_info['tree '+str(tree_index)] += [model["leaf"]] - leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] - if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] - elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] - num += 1 - return path, num, leaf_info - - -def find_feature_split(model, tree_index, num_features, feature_names): - count_layer = 0 - count_route = 0 - count_list = 0 - layer = {} - route = {} - layer[count_layer] = {} - layer[count_layer][count_list] = {} - layer[count_layer][count_list]["lst"] = [0] - layer[count_layer][count_list]["tab"] = model - feature_split = {} - num_features = len(feature_names) - - for i in range(num_features): - feature_split["feature " + str(i)] = [] - while True: - if len(layer[count_layer].keys()) == 0: - break - layer[count_layer + 1] = {} - count_list = 0 - for list_id in layer[count_layer]: - feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ - layer[count_layer][list_id]["tab"]["split_condition"]] - # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range - - for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): - if "children" not in children.keys(): - route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] - count_route += 1 - else: - layer[count_layer + 1][count_list] = {} - layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ - children["nodeid"]] - layer[count_layer + 1][count_list]["tab"] = children - count_list += 1 - count_layer += 1 - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - -def path_to_path_to_leaf(path, num_features, table, leaf_code_list): - path_to_leaf ={} - for p in path: - path_to_leaf[p] = {} - # path_to_leaf[p]['leaf'] = path[p]['leaf'] - path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) - # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) - for f in range(num_features): - ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] - end = table['feature '+str(f)][path[p]['f'+str(f)][1]] - path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() - return path_to_leaf - - -def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): - conditions = {} - for i in range(num_features): - conditions["f" + str(i)] = [0, feature_max[i]] - feature_split["feature " + str(i)] += [feature_max[i]] - - path = {} - path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) - leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) - path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) - return path_to_leaf, leaf_info - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): - - feature_split = find_feature_split(model, tree_index, num_features, feature_names) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - leaf_info['tree '+str(tree_index)] = [] - path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) - - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = np.ceil(math.log( - g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table, leaf_info - - -def ten_to_bin(num,count): - num = bin(int(num)).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def MaxMin_Norm_with_range(x, min , max, ranges = 10): - """[0,1] normaliaztion""" - x = (x - min) / (max - min) - return np.floor(ranges*x) - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # XGBoost - - data_train = xgb.DMatrix(train_X, label=train_y) - data_test = xgb.DMatrix(test_X, label=test_y) - watchlist = [(data_test, 'eval'), (data_train, 'train')] - param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} - bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) - - - bst.dump_model("src/temp/tree.txt") - sklearn_y_predict = bst.predict(data_test) - - result = classification_report(test_y, sklearn_y_predict) - # exit() - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ("num_feature: " + str(num_features)) not in log_dict: - log_dict["num_feature: " + str(num_features)] = {} - if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} - if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ - "num_tree: " + str(num_trees)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ - "num_depth: " + str(num_depth)] = {} - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "classification_report"] = result - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "max number of leaf nodes"] = max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print('Classification results are downloaded to log as', log_file) - - - the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") - xgb_model = {} - for i, m in enumerate(the_model): - xgb_model[i] = json.loads(m) - - - - g_table = {} - # feature_names = test_X.columns.T.tolist() - leaf_info ={} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx in xgb_model: - estimator = xgb_model[idx] - g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) - - - - def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for t in range(num_trees): - vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) - tree_num -= 1 - return g_table, num - - - ranges = 10 - g_table['votes to class'] = {} - print("\nGenerating vote to class table...",end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) - print('Done') - - feature_width = [] - for maxs in feature_max: - feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) - - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' + str(i)) - LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - print("\nPreparing default vote...", end="") - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - LPM_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) - print('Done') - # ===================== prepare default class ========================= - print("Preparing default class...", end="") - collect_class = np.zeros(num_classes).tolist() - for idx in Exact_Table['decision']: - collect_class[Exact_Table['decision'][idx]['class']] += 1 - default_class = collect_class.index(max(collect_class)) - - code_table_size = 0 - LPM_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) - print('LPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "XGB" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list)!=str(lpm_code_list): - print('error in exact to ternary match', code_list,lpm_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - switch_prediction = Planter_config['p4 config']["default label"] - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn import tree +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import classification_report +import xgboost as xgb +import copy +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +import math +import time +import re +import json + +from sklearn.metrics import * + + + +def map(value): + value = value + return value + +def get_path(model, conditions, path, num, leaf_info, tree_index): + if 'children' in model.keys(): + conditions_yes = copy.deepcopy(conditions) + conditions_no = copy.deepcopy(conditions) + if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: + conditions_yes[model["split"]][1] = map(model["split_condition"])-1 + if conditions_no[model["split"]][0] < map(model["split_condition"]) : + conditions_no[model["split"]][0] = map(model["split_condition"]) + for child_model in model["children"]: + if child_model["nodeid"]==model["yes"]: + path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) + if child_model["nodeid"]==model["no"]: + path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) + else: + # print(path, conditions) + path['path '+str(num)] = conditions + path['path '+str(num)]['leaf'] = model["leaf"] + # leaf_info['tree '+str(tree_index)] += [model["leaf"]] + leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] + if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] + elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] + num += 1 + return path, num, leaf_info + + +def find_feature_split(model, tree_index, num_features, feature_names): + count_layer = 0 + count_route = 0 + count_list = 0 + layer = {} + route = {} + layer[count_layer] = {} + layer[count_layer][count_list] = {} + layer[count_layer][count_list]["lst"] = [0] + layer[count_layer][count_list]["tab"] = model + feature_split = {} + num_features = len(feature_names) + + for i in range(num_features): + feature_split["feature " + str(i)] = [] + while True: + if len(layer[count_layer].keys()) == 0: + break + layer[count_layer + 1] = {} + count_list = 0 + for list_id in layer[count_layer]: + feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ + layer[count_layer][list_id]["tab"]["split_condition"]] + # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range + + for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): + if "children" not in children.keys(): + route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] + count_route += 1 + else: + layer[count_layer + 1][count_list] = {} + layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ + children["nodeid"]] + layer[count_layer + 1][count_list]["tab"] = children + count_list += 1 + count_layer += 1 + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + +def path_to_path_to_leaf(path, num_features, table, leaf_code_list): + path_to_leaf ={} + for p in path: + path_to_leaf[p] = {} + # path_to_leaf[p]['leaf'] = path[p]['leaf'] + path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) + # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) + for f in range(num_features): + ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] + end = table['feature '+str(f)][path[p]['f'+str(f)][1]] + path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() + return path_to_leaf + + +def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): + conditions = {} + for i in range(num_features): + conditions["f" + str(i)] = [0, feature_max[i]] + feature_split["feature " + str(i)] += [feature_max[i]] + + path = {} + path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) + leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) + path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) + return path_to_leaf, leaf_info + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): + + feature_split = find_feature_split(model, tree_index, num_features, feature_names) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + leaf_info['tree '+str(tree_index)] = [] + path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) + + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = np.ceil(math.log( + g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table, leaf_info + + +def ten_to_bin(num,count): + num = bin(int(num)).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def MaxMin_Norm_with_range(x, min , max, ranges = 10): + """[0,1] normaliaztion""" + x = (x - min) / (max - min) + return np.floor(ranges*x) + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # XGBoost + + data_train = xgb.DMatrix(train_X, label=train_y) + data_test = xgb.DMatrix(test_X, label=test_y) + watchlist = [(data_test, 'eval'), (data_train, 'train')] + param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} + bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) + + + bst.dump_model("src/temp/tree.txt") + sklearn_y_predict = bst.predict(data_test) + + result = classification_report(test_y, sklearn_y_predict) + # exit() + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ("num_feature: " + str(num_features)) not in log_dict: + log_dict["num_feature: " + str(num_features)] = {} + if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} + if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ + "num_tree: " + str(num_trees)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ + "num_depth: " + str(num_depth)] = {} + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "classification_report"] = result + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "max number of leaf nodes"] = max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print('Classification results are downloaded to log as', log_file) + + + the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") + xgb_model = {} + for i, m in enumerate(the_model): + xgb_model[i] = json.loads(m) + + + + g_table = {} + # feature_names = test_X.columns.T.tolist() + leaf_info ={} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx in xgb_model: + estimator = xgb_model[idx] + g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) + + + + def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for t in range(num_trees): + vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) + tree_num -= 1 + return g_table, num + + + ranges = 10 + g_table['votes to class'] = {} + print("\nGenerating vote to class table...",end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) + print('Done') + + feature_width = [] + for maxs in feature_max: + feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) + + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' + str(i)) + LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + print("\nPreparing default vote...", end="") + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + LPM_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + LPM_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(LPM_Table['tree ' + str(t)]) + print('Done') + # ===================== prepare default class ========================= + print("Preparing default class...", end="") + collect_class = np.zeros(num_classes).tolist() + for idx in Exact_Table['decision']: + collect_class[Exact_Table['decision'][idx]['class']] += 1 + default_class = collect_class.index(max(collect_class)) + + code_table_size = 0 + LPM_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + LPM_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) + print('LPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "XGB" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list)!=str(lpm_code_list): + print('error in exact to ternary match', code_list,lpm_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + switch_prediction = Planter_config['p4 config']["default label"] + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/XGB/Type_3/dedicated_p4.py b/src/models/XGB/Type_3/dedicated_p4.py index afe247a..5e23637 100755 --- a/src/models/XGB/Type_3/dedicated_p4.py +++ b/src/models/XGB/Type_3/dedicated_p4.py @@ -1,319 +1,319 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json -import math - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { meta.feature" + str(i) + ":lpm; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " meta.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " meta.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in LPM_Table['feature ' + str(f)]: - priority = int(idx) - key = LPM_Table['feature ' + str(f)][idx][1] - mask = LPM_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = LPM_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + - " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+"."+ - str(((1<<8)-1)&(key))+ "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + - " => " + str(label) + " \n") - - file.write("\n") - - - for t in range(num_trees): - for idx in LPM_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in LPM_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - "import math\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + - str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") - - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json +import math + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { meta.feature" + str(i) + ":lpm; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " meta.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " meta.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in LPM_Table['feature ' + str(f)]: + priority = int(idx) + key = LPM_Table['feature ' + str(f)][idx][1] + mask = LPM_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = LPM_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f) + " extract_feature" + str(f) + + " "+str(((1<<8)-1)&(key>>24))+"."+str(((1<<8)-1)&(key>>16))+"."+str(((1<<8)-1)&(key>>8))+"."+ + str(((1<<8)-1)&(key))+ "/" + str(32 - int(math.log(2 ** config['p4 config']["width of feature"][f] - mask, 2))) + + " => " + str(label) + " \n") + + file.write("\n") + + + for t in range(num_trees): + for idx in LPM_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(LPM_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(LPM_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in LPM_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(LPM_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(LPM_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + "import math\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], int(32-math.log(2**config['width of feature'][" + + str(i) + "]-table['feature " + str(i) + "'][key][0],2)), int(codes,2) )\n") + + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/XGB/Type_3/readme.md b/src/models/XGB/Type_3/readme.md index b91f444..50d6d54 100644 --- a/src/models/XGB/Type_3/readme.md +++ b/src/models/XGB/Type_3/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/XGB/Type_3/table_generator.py b/src/models/XGB/Type_3/table_generator.py index 7ad8f97..f0a1d04 100755 --- a/src/models/XGB/Type_3/table_generator.py +++ b/src/models/XGB/Type_3/table_generator.py @@ -1,570 +1,570 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn import tree -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import classification_report -import xgboost as xgb -import copy -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.Range_to_LPM import * -from src.functions.json_encoder import * -from src.functions.Muti_Exact_to_LPM import * -import math -import time -import re -import json - -from sklearn.metrics import * - - - -def map(value): - value = value - return value - -def get_path(model, conditions, path, num, leaf_info, tree_index): - if 'children' in model.keys(): - conditions_yes = copy.deepcopy(conditions) - conditions_no = copy.deepcopy(conditions) - if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: - conditions_yes[model["split"]][1] = map(model["split_condition"])-1 - if conditions_no[model["split"]][0] < map(model["split_condition"]) : - conditions_no[model["split"]][0] = map(model["split_condition"]) - for child_model in model["children"]: - if child_model["nodeid"]==model["yes"]: - path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) - if child_model["nodeid"]==model["no"]: - path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) - else: - # print(path, conditions) - path['path '+str(num)] = conditions - path['path '+str(num)]['leaf'] = model["leaf"] - # leaf_info['tree '+str(tree_index)] += [model["leaf"]] - leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] - if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] - elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] - num += 1 - return path, num, leaf_info - - -def find_feature_split(model, tree_index, num_features, feature_names): - count_layer = 0 - count_route = 0 - count_list = 0 - layer = {} - route = {} - layer[count_layer] = {} - layer[count_layer][count_list] = {} - layer[count_layer][count_list]["lst"] = [0] - layer[count_layer][count_list]["tab"] = model - feature_split = {} - num_features = len(feature_names) - - for i in range(num_features): - feature_split["feature " + str(i)] = [] - while True: - if len(layer[count_layer].keys()) == 0: - break - layer[count_layer + 1] = {} - count_list = 0 - for list_id in layer[count_layer]: - feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ - layer[count_layer][list_id]["tab"]["split_condition"]] - # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range - - for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): - if "children" not in children.keys(): - route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] - count_route += 1 - else: - layer[count_layer + 1][count_list] = {} - layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ - children["nodeid"]] - layer[count_layer + 1][count_list]["tab"] = children - count_list += 1 - count_layer += 1 - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - -def path_to_path_to_leaf(path, num_features, table, leaf_code_list): - path_to_leaf ={} - for p in path: - path_to_leaf[p] = {} - # path_to_leaf[p]['leaf'] = path[p]['leaf'] - path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) - # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) - for f in range(num_features): - ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] - end = table['feature '+str(f)][path[p]['f'+str(f)][1]] - path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() - return path_to_leaf - - -def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): - conditions = {} - for i in range(num_features): - conditions["f" + str(i)] = [0, feature_max[i]] - feature_split["feature " + str(i)] += [feature_max[i]] - - path = {} - path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) - leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) - path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) - return path_to_leaf, leaf_info - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): - - feature_split = find_feature_split(model, tree_index, num_features, feature_names) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - leaf_info['tree '+str(tree_index)] = [] - path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) - - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = np.ceil(math.log( - g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table, leaf_info - - -def ten_to_bin(num,count): - num = bin(int(num)).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def MaxMin_Norm_with_range(x, min , max, ranges = 10): - """[0,1] normaliaztion""" - x = (x - min) / (max - min) - return np.floor(ranges*x) - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [np.max(t_t)+1] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # XGBoost - - data_train = xgb.DMatrix(train_X, label=train_y) - data_test = xgb.DMatrix(test_X, label=test_y) - watchlist = [(data_test, 'eval'), (data_train, 'train')] - param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} - bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) - - # param = {'max_depth': 8, 'num_class': 2} - # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) - bst.dump_model("src/temp/tree.txt") - sklearn_y_predict = bst.predict(data_test) - - result = classification_report(test_y, sklearn_y_predict) - # exit() - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ("num_feature: " + str(num_features)) not in log_dict: - log_dict["num_feature: " + str(num_features)] = {} - if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} - if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ - "num_tree: " + str(num_trees)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ - "num_depth: " + str(num_depth)] = {} - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "classification_report"] = result - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "max number of leaf nodes"] = max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print('Classification results are downloaded to log as', log_file) - - - the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") - xgb_model = {} - for i, m in enumerate(the_model): - xgb_model[i] = json.loads(m) - - - - g_table = {} - # feature_names = test_X.columns.T.tolist() - leaf_info ={} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx in xgb_model: - estimator = xgb_model[idx] - g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) - - - - def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for t in range(num_trees): - vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) - tree_num -= 1 - return g_table, num - - - ranges = 10 - g_table['votes to class'] = {} - print("\nGenerating vote to class table...",end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) - print('Done') - - feature_width = [] - for maxs in feature_max: - feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) - - - - LPM_Table = {} - LPM_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - LPM_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - LPM_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(LPM_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' + str(i)) - LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) - - - # ===================== tree table to LPM ========================= - default_vote = 0 - - for t in range(num_trees): - LPM_Table['tree '+str(t)] = {} - print('') - print('Begine transfer: Tree '+str(t)+' table ') - key_name = [] - for f in range(num_features): - key_name += ['f' + str(f) + ' code'] - action_name = 'leaf' - # prepare default - LPM_Table['tree '+str(t)] = Muti_Exact_to_LPM_Concatination(Exact_Table['tree '+str(t)], code_width_tree_feature[t], key_name, action_name) - - # ===================== decision table to LPM ========================= - default_class = 0 - - LPM_Table['decision'] = {} - print('') - print('Begine transfer decision table ') - key_name = [] - for t in range(num_trees): - key_name += ['t' + str(t) + ' vote'] - action_name = 'class' - - decision_table_key_width = [] - for t in range(num_trees): - # decision_table_key_width += [5] - decision_table_key_width += [int(1+np.ceil(math.log(num_classes, 2)))] - - # prepare default - LPM_Table['decision'] = Muti_Exact_to_LPM_Concatination(Exact_Table['decision'], decision_table_key_width, key_name, action_name) - Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) - print('LPM_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "XGB" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'LPM_Table.json' - Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["width of decision table keys"] = decision_table_key_width - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - decision_table_key_width = Planter_config['p4 config']["width of decision table keys"] - LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - lpm_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - LPM_table = LPM_Table['feature ' + str(f)] - keys = list(LPM_table.keys()) - - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match - - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - - if str(code_list) != str(lpm_code_list): - print('error in exact to lpm match', code_list, lpm_code_list) - - binary_code = '' - for f in range(num_features): - binary_code = binary_code + ten_to_bin(int(code_list[f]), - int(Planter_config['p4 config']["width of code"][tree][f])) - decimal_code = int(binary_code, 2) - - LPM_table = LPM_Table['tree ' + str(tree)] - keys = list(LPM_table.keys()) - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - vote_list[tree] = action[max_index] - - binary_code = '' - for t in range(num_trees): - binary_code = binary_code + ten_to_bin(int(vote_list[t]), decision_table_key_width[t]) - decimal_code = int(binary_code, 2) - - LPM_table = LPM_Table['decision'] - keys = list(LPM_table.keys()) - mask = [] - action = [] - for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key - if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ - 1]: # if there is a ternary match - mask.append(LPM_table[count][0]) # array of masks - action.append(LPM_table[count][2]) # array of actions - max_mask = max(mask) - max_index = mask.index(max_mask) - switch_prediction = action[max_index] - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - print('Exit, the P4 generator (dedicate P4) is currently unavailable in this variation.') - exit() - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn import tree +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import classification_report +import xgboost as xgb +import copy +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.Range_to_LPM import * +from src.functions.json_encoder import * +from src.functions.Muti_Exact_to_LPM import * +import math +import time +import re +import json + +from sklearn.metrics import * + + + +def map(value): + value = value + return value + +def get_path(model, conditions, path, num, leaf_info, tree_index): + if 'children' in model.keys(): + conditions_yes = copy.deepcopy(conditions) + conditions_no = copy.deepcopy(conditions) + if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: + conditions_yes[model["split"]][1] = map(model["split_condition"])-1 + if conditions_no[model["split"]][0] < map(model["split_condition"]) : + conditions_no[model["split"]][0] = map(model["split_condition"]) + for child_model in model["children"]: + if child_model["nodeid"]==model["yes"]: + path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) + if child_model["nodeid"]==model["no"]: + path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) + else: + # print(path, conditions) + path['path '+str(num)] = conditions + path['path '+str(num)]['leaf'] = model["leaf"] + # leaf_info['tree '+str(tree_index)] += [model["leaf"]] + leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] + if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] + elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] + num += 1 + return path, num, leaf_info + + +def find_feature_split(model, tree_index, num_features, feature_names): + count_layer = 0 + count_route = 0 + count_list = 0 + layer = {} + route = {} + layer[count_layer] = {} + layer[count_layer][count_list] = {} + layer[count_layer][count_list]["lst"] = [0] + layer[count_layer][count_list]["tab"] = model + feature_split = {} + num_features = len(feature_names) + + for i in range(num_features): + feature_split["feature " + str(i)] = [] + while True: + if len(layer[count_layer].keys()) == 0: + break + layer[count_layer + 1] = {} + count_list = 0 + for list_id in layer[count_layer]: + feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ + layer[count_layer][list_id]["tab"]["split_condition"]] + # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range + + for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): + if "children" not in children.keys(): + route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] + count_route += 1 + else: + layer[count_layer + 1][count_list] = {} + layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ + children["nodeid"]] + layer[count_layer + 1][count_list]["tab"] = children + count_list += 1 + count_layer += 1 + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + +def path_to_path_to_leaf(path, num_features, table, leaf_code_list): + path_to_leaf ={} + for p in path: + path_to_leaf[p] = {} + # path_to_leaf[p]['leaf'] = path[p]['leaf'] + path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) + # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) + for f in range(num_features): + ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] + end = table['feature '+str(f)][path[p]['f'+str(f)][1]] + path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() + return path_to_leaf + + +def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): + conditions = {} + for i in range(num_features): + conditions["f" + str(i)] = [0, feature_max[i]] + feature_split["feature " + str(i)] += [feature_max[i]] + + path = {} + path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) + leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) + path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) + return path_to_leaf, leaf_info + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): + + feature_split = find_feature_split(model, tree_index, num_features, feature_names) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + leaf_info['tree '+str(tree_index)] = [] + path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) + + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = np.ceil(math.log( + g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table, leaf_info + + +def ten_to_bin(num,count): + num = bin(int(num)).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def MaxMin_Norm_with_range(x, min , max, ranges = 10): + """[0,1] normaliaztion""" + x = (x - min) / (max - min) + return np.floor(ranges*x) + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [np.max(t_t)+1] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # XGBoost + + data_train = xgb.DMatrix(train_X, label=train_y) + data_test = xgb.DMatrix(test_X, label=test_y) + watchlist = [(data_test, 'eval'), (data_train, 'train')] + param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} + bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) + + # param = {'max_depth': 8, 'num_class': 2} + # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) + bst.dump_model("src/temp/tree.txt") + sklearn_y_predict = bst.predict(data_test) + + result = classification_report(test_y, sklearn_y_predict) + # exit() + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ("num_feature: " + str(num_features)) not in log_dict: + log_dict["num_feature: " + str(num_features)] = {} + if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} + if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ + "num_tree: " + str(num_trees)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ + "num_depth: " + str(num_depth)] = {} + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "classification_report"] = result + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "max number of leaf nodes"] = max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print('Classification results are downloaded to log as', log_file) + + + the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") + xgb_model = {} + for i, m in enumerate(the_model): + xgb_model[i] = json.loads(m) + + + + g_table = {} + # feature_names = test_X.columns.T.tolist() + leaf_info ={} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx in xgb_model: + estimator = xgb_model[idx] + g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) + + + + def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for t in range(num_trees): + vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) + tree_num -= 1 + return g_table, num + + + ranges = 10 + g_table['votes to class'] = {} + print("\nGenerating vote to class table...",end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) + print('Done') + + feature_width = [] + for maxs in feature_max: + feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) + + + + LPM_Table = {} + LPM_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + LPM_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + LPM_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + LPM_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + LPM_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(LPM_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' + str(i)) + LPM_Table['feature '+str(i)]= Table_to_LPM(LPM_Table['feature '+str(i)], feature_width[i]) + + + # ===================== tree table to LPM ========================= + default_vote = 0 + + for t in range(num_trees): + LPM_Table['tree '+str(t)] = {} + print('') + print('Begine transfer: Tree '+str(t)+' table ') + key_name = [] + for f in range(num_features): + key_name += ['f' + str(f) + ' code'] + action_name = 'leaf' + # prepare default + LPM_Table['tree '+str(t)] = Muti_Exact_to_LPM_Concatination(Exact_Table['tree '+str(t)], code_width_tree_feature[t], key_name, action_name) + + # ===================== decision table to LPM ========================= + default_class = 0 + + LPM_Table['decision'] = {} + print('') + print('Begine transfer decision table ') + key_name = [] + for t in range(num_trees): + key_name += ['t' + str(t) + ' vote'] + action_name = 'class' + + decision_table_key_width = [] + for t in range(num_trees): + # decision_table_key_width += [5] + decision_table_key_width += [int(1+np.ceil(math.log(num_classes, 2)))] + + # prepare default + LPM_Table['decision'] = Muti_Exact_to_LPM_Concatination(Exact_Table['decision'], decision_table_key_width, key_name, action_name) + Exact_Table['decision'] = copy.deepcopy(LPM_Table['decision']) + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(LPM_Table, open('Tables/LPM_Table.json', 'w'), indent=4) + print('LPM_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "XGB" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'LPM_Table.json' + Planter_config['p4 config']["decision table size"] = len(LPM_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(LPM_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["width of decision table keys"] = decision_table_key_width + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(LPM_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + decision_table_key_width = Planter_config['p4 config']["width of decision table keys"] + LPM_Table = json.load(open('Tables/LPM_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + lpm_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + LPM_table = LPM_Table['feature ' + str(f)] + keys = list(LPM_table.keys()) + + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if input_feature_value[f] & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + lpm_code_list[f] = action[max_index][tree] # Choose the action with the longest prefix match + + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + + if str(code_list) != str(lpm_code_list): + print('error in exact to lpm match', code_list, lpm_code_list) + + binary_code = '' + for f in range(num_features): + binary_code = binary_code + ten_to_bin(int(code_list[f]), + int(Planter_config['p4 config']["width of code"][tree][f])) + decimal_code = int(binary_code, 2) + + LPM_table = LPM_Table['tree ' + str(tree)] + keys = list(LPM_table.keys()) + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + vote_list[tree] = action[max_index] + + binary_code = '' + for t in range(num_trees): + binary_code = binary_code + ten_to_bin(int(vote_list[t]), decision_table_key_width[t]) + decimal_code = int(binary_code, 2) + + LPM_table = LPM_Table['decision'] + keys = list(LPM_table.keys()) + mask = [] + action = [] + for count in np.sort(keys): # For each value in LPM table, check if it matches that separation key + if decimal_code & LPM_table[count][0] == LPM_table[count][0] & LPM_table[count][ + 1]: # if there is a ternary match + mask.append(LPM_table[count][0]) # array of masks + action.append(LPM_table[count][2]) # array of actions + max_mask = max(mask) + max_index = mask.index(max_mask) + switch_prediction = action[max_index] + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + print('Exit, the P4 generator (dedicate P4) is currently unavailable in this variation.') + exit() + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/XGB/Type_EB/dedicated_p4.py b/src/models/XGB/Type_EB/dedicated_p4.py index e38a615..337c217 100755 --- a/src/models/XGB/Type_EB/dedicated_p4.py +++ b/src/models/XGB/Type_EB/dedicated_p4.py @@ -1,314 +1,314 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in Ternary_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Ternary_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + - "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in Ternary_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Ternary_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/XGB/Type_EB/readme.md b/src/models/XGB/Type_EB/readme.md index b91f444..50d6d54 100644 --- a/src/models/XGB/Type_EB/readme.md +++ b/src/models/XGB/Type_EB/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/XGB/Type_EB/table_generator.py b/src/models/XGB/Type_EB/table_generator.py index c5b4abf..c9d3d80 100755 --- a/src/models/XGB/Type_EB/table_generator.py +++ b/src/models/XGB/Type_EB/table_generator.py @@ -1,559 +1,559 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') - -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn import tree -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import classification_report -import xgboost as xgb -import copy -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -import math -import time -import re -import json - -from sklearn.metrics import * - - - -def map(value): - value = value - return value - -def get_path(model, conditions, path, num, leaf_info, tree_index): - if 'children' in model.keys(): - conditions_yes = copy.deepcopy(conditions) - conditions_no = copy.deepcopy(conditions) - if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: - conditions_yes[model["split"]][1] = map(model["split_condition"])-1 - if conditions_no[model["split"]][0] < map(model["split_condition"]) : - conditions_no[model["split"]][0] = map(model["split_condition"]) - for child_model in model["children"]: - if child_model["nodeid"]==model["yes"]: - path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) - if child_model["nodeid"]==model["no"]: - path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) - else: - # print(path, conditions) - path['path '+str(num)] = conditions - path['path '+str(num)]['leaf'] = model["leaf"] - # leaf_info['tree '+str(tree_index)] += [model["leaf"]] - leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] - if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] - elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] - num += 1 - return path, num, leaf_info - - -def find_feature_split(model, tree_index, num_features, feature_names): - count_layer = 0 - count_route = 0 - count_list = 0 - layer = {} - route = {} - layer[count_layer] = {} - layer[count_layer][count_list] = {} - layer[count_layer][count_list]["lst"] = [0] - layer[count_layer][count_list]["tab"] = model - feature_split = {} - num_features = len(feature_names) - - for i in range(num_features): - feature_split["feature " + str(i)] = [] - while True: - if len(layer[count_layer].keys()) == 0: - break - layer[count_layer + 1] = {} - count_list = 0 - for list_id in layer[count_layer]: - feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ - layer[count_layer][list_id]["tab"]["split_condition"]] - # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range - - for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): - if "children" not in children.keys(): - route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] - count_route += 1 - else: - layer[count_layer + 1][count_list] = {} - layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ - children["nodeid"]] - layer[count_layer + 1][count_list]["tab"] = children - count_list += 1 - count_layer += 1 - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(feature_max[i]+1): - if nife !=[] : - if len(nife) > count_code: - if j == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - -def path_to_path_to_leaf(path, num_features, table, leaf_code_list): - path_to_leaf ={} - for p in path: - path_to_leaf[p] = {} - # path_to_leaf[p]['leaf'] = path[p]['leaf'] - path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) - # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) - for f in range(num_features): - ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] - end = table['feature '+str(f)][path[p]['f'+str(f)][1]] - path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() - return path_to_leaf - - -def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): - conditions = {} - for i in range(num_features): - conditions["f" + str(i)] = [0, feature_max[i]] - feature_split["feature " + str(i)] += [feature_max[i]] - - path = {} - path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) - leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) - path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) - return path_to_leaf, leaf_info - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): - - feature_split = find_feature_split(model, tree_index, num_features, feature_names) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - leaf_info['tree '+str(tree_index)] = [] - path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) - - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = np.ceil(math.log( - g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table, leaf_info - - -def ten_to_bin(num,count): - num = bin(int(num)).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def MaxMin_Norm_with_range(x, min , max, ranges = 10): - """[0,1] normaliaztion""" - x = (x - min) / (max - min) - return np.floor(ranges*x) - -def run_model(train_X, train_y, test_X, test_y, used_features): - config_file = 'src/configs/Planter_config.json' - - Planter_config = json.load(open(config_file, 'r')) - - Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') - Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') - Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # XGBoost - - data_train = xgb.DMatrix(train_X, label=train_y) - data_test = xgb.DMatrix(test_X, label=test_y) - watchlist = [(data_test, 'eval'), (data_train, 'train')] - param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} - bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) - - # param = {'max_depth': 8, 'num_class': 2} - # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) - bst.dump_model("src/temp/tree.txt") - sklearn_y_predict = bst.predict(data_test) - - result = classification_report(test_y, sklearn_y_predict) - # exit() - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ("num_feature: " + str(num_features)) not in log_dict: - log_dict["num_feature: " + str(num_features)] = {} - if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} - if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ - "num_tree: " + str(num_trees)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ - "num_depth: " + str(num_depth)] = {} - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "classification_report"] = result - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "max number of leaf nodes"] = max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print('Classification results are downloaded to log as', log_file) - - - the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") - xgb_model = {} - for i, m in enumerate(the_model): - xgb_model[i] = json.loads(m) - - - - g_table = {} - # feature_names = test_X.columns.T.tolist() - leaf_info ={} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx in xgb_model: - estimator = xgb_model[idx] - g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) - - - - def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for t in range(num_trees): - vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) - tree_num -= 1 - return g_table, num - - - ranges = 10 - g_table['votes to class'] = {} - print("\nGenerating vote to class table...",end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) - print('Done') - - feature_width = [] - for maxs in feature_max: - feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) - code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) - # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) - # print('stop') - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' + str(i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - print("\nPreparing default vote...", end="") - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - Ternary_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - Ternary_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(Ternary_Table['tree ' + str(t)]) - print('Done') - # ===================== prepare default class ========================= - print("Preparing default class...", end="") - collect_class = np.zeros(num_classes).tolist() - for idx in Exact_Table['decision']: - collect_class[Exact_Table['decision'][idx]['class']] += 1 - default_class = collect_class.index(max(collect_class)) - - code_table_size = 0 - Ternary_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('Ternary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "XGB" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree ' + str(tree)].keys())] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - switch_prediction = Planter_config['p4 config']["default label"] - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') + +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn import tree +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import classification_report +import xgboost as xgb +import copy +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +import math +import time +import re +import json + +from sklearn.metrics import * + + + +def map(value): + value = value + return value + +def get_path(model, conditions, path, num, leaf_info, tree_index): + if 'children' in model.keys(): + conditions_yes = copy.deepcopy(conditions) + conditions_no = copy.deepcopy(conditions) + if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: + conditions_yes[model["split"]][1] = map(model["split_condition"])-1 + if conditions_no[model["split"]][0] < map(model["split_condition"]) : + conditions_no[model["split"]][0] = map(model["split_condition"]) + for child_model in model["children"]: + if child_model["nodeid"]==model["yes"]: + path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) + if child_model["nodeid"]==model["no"]: + path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) + else: + # print(path, conditions) + path['path '+str(num)] = conditions + path['path '+str(num)]['leaf'] = model["leaf"] + # leaf_info['tree '+str(tree_index)] += [model["leaf"]] + leaf_info['tree ' + str(tree_index)] += [round(model["leaf"], 1)] + if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] + elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] + num += 1 + return path, num, leaf_info + + +def find_feature_split(model, tree_index, num_features, feature_names): + count_layer = 0 + count_route = 0 + count_list = 0 + layer = {} + route = {} + layer[count_layer] = {} + layer[count_layer][count_list] = {} + layer[count_layer][count_list]["lst"] = [0] + layer[count_layer][count_list]["tab"] = model + feature_split = {} + num_features = len(feature_names) + + for i in range(num_features): + feature_split["feature " + str(i)] = [] + while True: + if len(layer[count_layer].keys()) == 0: + break + layer[count_layer + 1] = {} + count_list = 0 + for list_id in layer[count_layer]: + feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ + layer[count_layer][list_id]["tab"]["split_condition"]] + # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range + + for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): + if "children" not in children.keys(): + route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] + count_route += 1 + else: + layer[count_layer + 1][count_list] = {} + layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ + children["nodeid"]] + layer[count_layer + 1][count_list]["tab"] = children + count_list += 1 + count_layer += 1 + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(feature_max[i]+1): + if nife !=[] : + if len(nife) > count_code: + if j == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + +def path_to_path_to_leaf(path, num_features, table, leaf_code_list): + path_to_leaf ={} + for p in path: + path_to_leaf[p] = {} + # path_to_leaf[p]['leaf'] = path[p]['leaf'] + path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf'], 1)) + # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) + for f in range(num_features): + ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] + end = table['feature '+str(f)][path[p]['f'+str(f)][1]] + path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() + return path_to_leaf + + +def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): + conditions = {} + for i in range(num_features): + conditions["f" + str(i)] = [0, feature_max[i]] + feature_split["feature " + str(i)] += [feature_max[i]] + + path = {} + path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) + leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) + path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) + return path_to_leaf, leaf_info + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): + + feature_split = find_feature_split(model, tree_index, num_features, feature_names) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + leaf_info['tree '+str(tree_index)] = [] + path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) + + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = np.ceil(math.log( + g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table, leaf_info + + +def ten_to_bin(num,count): + num = bin(int(num)).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def MaxMin_Norm_with_range(x, min , max, ranges = 10): + """[0,1] normaliaztion""" + x = (x - min) / (max - min) + return np.floor(ranges*x) + +def run_model(train_X, train_y, test_X, test_y, used_features): + config_file = 'src/configs/Planter_config.json' + + Planter_config = json.load(open(config_file, 'r')) + + Planter_config['model config']['number of trees'] = int(input('- Number of trees? (default = 6) ') or '6') + Planter_config['model config']['number of depth'] = int(input('- Number of depth? (default = 4) ') or '4') + Planter_config['model config']['max number of leaf nodes'] = int(input('- Number of leaf nodes? (default = 1000) ') or '1000') + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # XGBoost + + data_train = xgb.DMatrix(train_X, label=train_y) + data_test = xgb.DMatrix(test_X, label=test_y) + watchlist = [(data_test, 'eval'), (data_train, 'train')] + param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} + bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) + + # param = {'max_depth': 8, 'num_class': 2} + # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) + bst.dump_model("src/temp/tree.txt") + sklearn_y_predict = bst.predict(data_test) + + result = classification_report(test_y, sklearn_y_predict) + # exit() + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ("num_feature: " + str(num_features)) not in log_dict: + log_dict["num_feature: " + str(num_features)] = {} + if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} + if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ + "num_tree: " + str(num_trees)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ + "num_depth: " + str(num_depth)] = {} + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "classification_report"] = result + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "max number of leaf nodes"] = max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print('Classification results are downloaded to log as', log_file) + + + the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") + xgb_model = {} + for i, m in enumerate(the_model): + xgb_model[i] = json.loads(m) + + + + g_table = {} + # feature_names = test_X.columns.T.tolist() + leaf_info ={} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx in xgb_model: + estimator = xgb_model[idx] + g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) + + + + def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for t in range(num_trees): + vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) + tree_num -= 1 + return g_table, num + + + ranges = 10 + g_table['votes to class'] = {} + print("\nGenerating vote to class table...",end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) + print('Done') + + feature_width = [] + for maxs in feature_max: + feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) + code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) + # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) + # print('stop') + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' + str(i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + print("\nPreparing default vote...", end="") + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + Ternary_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + Ternary_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(Ternary_Table['tree ' + str(t)]) + print('Done') + # ===================== prepare default class ========================= + print("Preparing default class...", end="") + collect_class = np.zeros(num_classes).tolist() + for idx in Exact_Table['decision']: + collect_class[Exact_Table['decision'][idx]['class']] += 1 + default_class = collect_class.index(max(collect_class)) + + code_table_size = 0 + Ternary_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('Ternary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "XGB" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys()) + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree ' + str(tree)].keys())] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + json.dump(Planter_config, open(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json', 'w'), indent=4, cls=NpEncoder) + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + switch_prediction = Planter_config['p4 config']["default label"] + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/XGB/Type_EB_auto/dedicated_p4.py b/src/models/XGB/Type_EB_auto/dedicated_p4.py index e38a615..337c217 100755 --- a/src/models/XGB/Type_EB_auto/dedicated_p4.py +++ b/src/models/XGB/Type_EB_auto/dedicated_p4.py @@ -1,314 +1,314 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is a P4 generator of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import json - - -def load_config(fname): - Planter_config = json.load(open('src/configs/' + fname, 'r')) - config_file = Planter_config['p4 config'] - config = {} - config['num_features'] = config_file["number of features"] - config['num_trees'] = config_file["number of trees"] - config['num_classes'] = config_file["number of classes"] - config['column_width'] = config_file['width of feature'] - config['result_width'] = config_file['width of result'] - config['code_width'] = config_file['width of code'] - config['feature_table_depth'] = config_file['used columns'] - config['headers_list'] = config_file['standard headers'] - config['code_tbl_depth'] = config_file['code table size'] - config["decision_table_size"] = config_file["decision table size"] - config['probability_width'] = config_file['width of probability'] - config['model'] = config_file['model'] - config['default label'] = config_file["default label"] - config['default_vote'] = config_file["default vote"] - return config, Planter_config - - -def add_model_intro(fname, config): - with open(fname, 'a') as intro: - intro.write("/*\n" - " * Planter\n" - " *\n" - " * This program implements a simple protocol. It can be carried over Ethernet\n" - " * (Ethertype 0x1234).\n" - " *\n" - " * The Protocol header looks like this:\n" - " *\n" - " * 0 1 2 3\n" - " * +----------------+----------------+----------------+---------------+\n" - " * | P | 4 | Version | Type |\n" - " * +----------------+----------------+----------------+---------------+\n") - for f in range(config['num_features']): - intro.write( " * | feature"+str(f)+" |\n" - " * +----------------+----------------+----------------+---------------+\n") - intro.write( " * | Result |\n" - " * +----------------+----------------+----------------+---------------+\n" - " *\n" - " * P is an ASCII Letter 'P' (0x50)\n" - " * 4 is an ASCII Letter '4' (0x34)\n" - " * Version is currently 1 (0x01)\n" - " * Type is currently 1 (0x01)\n" - " *\n" - " * The device receives a packet, do the classification, fills in the\n" - " * result and sends the packet back out of the same port it came in on, while\n" - " * swapping the source and destination addresses.\n" - " *\n" - " * If an unknown operation is specified or the header is not valid, the packet\n" - " * is dropped\n" - " */\n\n") - - -def separate_metadata(fname, config): - with open(fname, 'a') as headers: - # write the metadata struct - # headers.write("struct metadata_t {\n") - for i in range(0, config['num_features']): - headers.write( - " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") - headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") - for t in range(config['num_trees']): - headers.write(" bit<4> tree_" + str(t) + "_vote;\n") - for t in range(config['num_trees']): - headers.write(" bit<7> tree_" + str(t) + "_prob;\n") - headers.write(" bit<32> DstAddr;\n") - # headers.write("}\n\n") - -def separate_logics(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" lookup_feature" + str(i) + ".apply();\n") - for i in range(config['num_trees']): - ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") - ingress.write(" decision.apply();\n") - - -def separate_tables(fname, config): - with open(fname, 'a') as ingress: - for i in range(0, config['num_features']): - ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( - int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ - " meta_code = tree;\n" \ - " }\n\n") - - for i in range(0, config['num_features']): - ingress.write(" @pragma stage 0\n") - ingress.write(" table lookup_feature" + str(i) + " {\n" \ - " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ - " actions = {\n" \ - " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ - " NoAction;\n" \ - " }\n" \ - " size = " + str( config['feature_table_depth'][i]) + ";\n" \ - " default_action = NoAction;\n" \ - " }\n\n") - - for i in range(config['num_trees']): - ingress.write("\n action read_prob" + str(i) + "(") - ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" - " meta.tree_" + str(i) + "_prob" + " = prob;\n" - " meta.tree_" + str(i) + "_vote" + " = vote;\n" - " }\n") - - ingress.write(" action write_default_class" + str(i) + "() {\n" - " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" - " }\n\n") - - count_code = {} - for j in range(0, config['num_features']): - count_code[j] = 0 - for i in range(config['num_trees']): - ingress.write(" @pragma stage 1\n") - ingress.write(" table lookup_leaf_id" + str(i) + " {\n" - " key = { ") - for j in range(0, config['num_features']): - ingress.write( - "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") - count_code[j] += config['code_width'][i][j] - ingress.write("}\n") - ingress.write(" actions={\n" - " read_prob" + str(i) + ";\n" - " write_default_class" + str(i) + ";\n" - " }\n") - - ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" - " default_action = write_default_class" + str(i) + ";\n" - " }\n\n") - - ingress.write(" action read_lable(bit<32> label){\n" - " hdr.Planter.result = label;\n" - " }\n\n") - ingress.write(" action write_default_decision() {\n" - " hdr.Planter.result = " + str( config['default label']) + ";\n" - " }\n\n") - ingress.write(" table decision {\n key = { ") - for t in range(config['num_trees']): - ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") - ingress.write("}\n") - ingress.write(" actions={\n" - " read_lable;\n" - " write_default_decision;\n" - " }\n") - ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" - " default_action = write_default_decision;\n" - " }\n\n") -################################################### -# Create a tables load script -# input: table script file name, tables data json file name, configuration -# output: none -################################################### - - -def ten_to_bin(num,count): - num = bin(num).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def create_tables_Commend(fname, config): - num_features = config['data config']['number of features'] - num_classes = config['model config']['number of classes'] - num_trees = config['model config']['number of trees'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - with open(fname, 'w') as file: - - for f in range(num_features): - for idx in Ternary_Table['feature ' + str(f)]: - priority = int(idx) - key = Ternary_Table['feature ' + str(f)][idx][1] - mask = Ternary_Table['feature ' + str(f)][idx][0] - codes = '' - for t in range(num_trees): - c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] - c_len = config['p4 config']['width of code'][t][f] - codes = ten_to_bin(int(c_tree), int(c_len)) + codes - label = int(codes, 2) - file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ - " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") - - file.write("\n") - - - for t in range(num_trees): - for idx in Ternary_Table['tree ' + str(t)]: - file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") - for f in range(num_features): - file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") - file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") - - file.write("\n") - - for idx in Ternary_Table['decision']: - file.write("table_add SwitchIngress.decision read_lable ") - for t in range(num_trees): - file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") - file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") - - - -def create_load_tables(fname, fjson, config, Planter_config, file_name): - work_root = Planter_config['directory config']['work'] - - commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - commend_file = work_root + "/Tables/s1-commands.txt" - create_tables_Commend(commend_file, Planter_config) - - - config['debug_load_table'] = False - with open(fname, 'a') as tload: - tload.write("import json\n" \ - "import os\n" \ - "import binascii\n" \ - "import sys\n" + \ - ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ - "os.chdir('" + work_root + "')\n")) + \ - "print('working dir: ' + os.getcwd())\n" \ - "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ - "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ - "config = Planter_config['p4 config']\n\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") - tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") - - tload.write("def ten_to_bin(num, count):\n") - tload.write(" num = bin(num).lstrip('0b')\n") - tload.write(" if len(num) != count:\n") - tload.write(" cont = count - len(num)\n") - tload.write(" num = cont * '0' + num\n") - tload.write(" return num\n\n") - - - for i in range(0, config['num_features']): - tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ - "for k in range(len(table['feature " + str( i) + "'].keys())):\n") - tload.write(" key = str(k)\n") - tload.write(" codes = ''\n") - tload.write(" for tree in range(config['number of trees']):\n") - - tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + - "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") - - tload.write(" " + (config['debug_load_table'] * "# ") + - "Ingress.lookup_feature" + str(i) + - ".add_with_extract_feature" + str(i) + - "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + - "'][key][0], int(key), int(codes,2))\n") - if config['debug_load_table']: - tload.write( - " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( - i) + \ - "'][key][1],table['feature " + str(i) + \ - "'][key][0], int(key), int(codes,2)), end='')\n\n") - - # Load tree tables - for i in range(0, config['num_trees']): - tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( - i) + "'].keys()),'entries')\n") - tload.write("for key in table['tree " + str(i) + "']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.lookup_leaf_id" + str( - i) + ".add_with_read_prob" + str( - i) + "(") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for f in range(config['num_features']): - tload.write(" f" + str(f) + "_code: {}") - tload.write(" vote: {}'.format(key, ") - for f in range(config['num_features']): - tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") - tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") - - # Load decision tables - tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") - tload.write("for key in table['decision']:\n") - tload.write(" " + (config['debug_load_table'] * "# ") + \ - "Ingress.decision.add_with_read_lable(") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class'])\n") - if config['debug_load_table']: - tload.write(" print('\\r{}th entry ——") - for t in range(config['num_trees']): - tload.write(" tree" + str(f) + "_vote: {}") - tload.write(" class: {}'.format(key, ") - for t in range(config['num_trees']): - tload.write("table['decision'][key]['t" + str(t) + " vote'], ") - tload.write("table['decision'][key]['class']), end='')\n\n") +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF THE LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is a P4 generator of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import json + + +def load_config(fname): + Planter_config = json.load(open('src/configs/' + fname, 'r')) + config_file = Planter_config['p4 config'] + config = {} + config['num_features'] = config_file["number of features"] + config['num_trees'] = config_file["number of trees"] + config['num_classes'] = config_file["number of classes"] + config['column_width'] = config_file['width of feature'] + config['result_width'] = config_file['width of result'] + config['code_width'] = config_file['width of code'] + config['feature_table_depth'] = config_file['used columns'] + config['headers_list'] = config_file['standard headers'] + config['code_tbl_depth'] = config_file['code table size'] + config["decision_table_size"] = config_file["decision table size"] + config['probability_width'] = config_file['width of probability'] + config['model'] = config_file['model'] + config['default label'] = config_file["default label"] + config['default_vote'] = config_file["default vote"] + return config, Planter_config + + +def add_model_intro(fname, config): + with open(fname, 'a') as intro: + intro.write("/*\n" + " * Planter\n" + " *\n" + " * This program implements a simple protocol. It can be carried over Ethernet\n" + " * (Ethertype 0x1234).\n" + " *\n" + " * The Protocol header looks like this:\n" + " *\n" + " * 0 1 2 3\n" + " * +----------------+----------------+----------------+---------------+\n" + " * | P | 4 | Version | Type |\n" + " * +----------------+----------------+----------------+---------------+\n") + for f in range(config['num_features']): + intro.write( " * | feature"+str(f)+" |\n" + " * +----------------+----------------+----------------+---------------+\n") + intro.write( " * | Result |\n" + " * +----------------+----------------+----------------+---------------+\n" + " *\n" + " * P is an ASCII Letter 'P' (0x50)\n" + " * 4 is an ASCII Letter '4' (0x34)\n" + " * Version is currently 1 (0x01)\n" + " * Type is currently 1 (0x01)\n" + " *\n" + " * The device receives a packet, do the classification, fills in the\n" + " * result and sends the packet back out of the same port it came in on, while\n" + " * swapping the source and destination addresses.\n" + " *\n" + " * If an unknown operation is specified or the header is not valid, the packet\n" + " * is dropped\n" + " */\n\n") + + +def separate_metadata(fname, config): + with open(fname, 'a') as headers: + # write the metadata struct + # headers.write("struct metadata_t {\n") + for i in range(0, config['num_features']): + headers.write( + " bit<" + str(int(sum(np.array(config['code_width'])[:, i]))) + "> code_f" + str(i) + ";\n") + headers.write(" bit<" + str(config['probability_width']) + "> sum_prob" + ";\n") + for t in range(config['num_trees']): + headers.write(" bit<4> tree_" + str(t) + "_vote;\n") + for t in range(config['num_trees']): + headers.write(" bit<7> tree_" + str(t) + "_prob;\n") + headers.write(" bit<32> DstAddr;\n") + # headers.write("}\n\n") + +def separate_logics(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" lookup_feature" + str(i) + ".apply();\n") + for i in range(config['num_trees']): + ingress.write(" lookup_leaf_id" + str(i) + ".apply();\n") + ingress.write(" decision.apply();\n") + + +def separate_tables(fname, config): + with open(fname, 'a') as ingress: + for i in range(0, config['num_features']): + ingress.write(" action extract_feature" + str(i) + "(out bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> meta_code, bit<" + str( + int(sum(np.array(config['code_width'])[:, i]))) + "> tree){\n" \ + " meta_code = tree;\n" \ + " }\n\n") + + for i in range(0, config['num_features']): + ingress.write(" @pragma stage 0\n") + ingress.write(" table lookup_feature" + str(i) + " {\n" \ + " key = { hdr.Planter.feature" + str(i) + ":ternary; }\n" \ + " actions = {\n" \ + " extract_feature" + str(i) + "(meta.code_f" + str(i) + ");\n" \ + " NoAction;\n" \ + " }\n" \ + " size = " + str( config['feature_table_depth'][i]) + ";\n" \ + " default_action = NoAction;\n" \ + " }\n\n") + + for i in range(config['num_trees']): + ingress.write("\n action read_prob" + str(i) + "(") + ingress.write("bit<" + str(config['probability_width']) + "> prob, bit<4> vote){\n" + " meta.tree_" + str(i) + "_prob" + " = prob;\n" + " meta.tree_" + str(i) + "_vote" + " = vote;\n" + " }\n") + + ingress.write(" action write_default_class" + str(i) + "() {\n" + " meta.tree_" + str(i) + "_vote = " + str(config['default_vote']) + ";\n" + " }\n\n") + + count_code = {} + for j in range(0, config['num_features']): + count_code[j] = 0 + for i in range(config['num_trees']): + ingress.write(" @pragma stage 1\n") + ingress.write(" table lookup_leaf_id" + str(i) + " {\n" + " key = { ") + for j in range(0, config['num_features']): + ingress.write( + "meta.code_f" + str(j) + "[" + str(int(count_code[j] + config['code_width'][i][j] - 1)) + ":" + str(int(count_code[j])) + "]:exact;\n ") + count_code[j] += config['code_width'][i][j] + ingress.write("}\n") + ingress.write(" actions={\n" + " read_prob" + str(i) + ";\n" + " write_default_class" + str(i) + ";\n" + " }\n") + + ingress.write(" size = " + str(config['code_tbl_depth'][i]) + ";\n" + " default_action = write_default_class" + str(i) + ";\n" + " }\n\n") + + ingress.write(" action read_lable(bit<32> label){\n" + " hdr.Planter.result = label;\n" + " }\n\n") + ingress.write(" action write_default_decision() {\n" + " hdr.Planter.result = " + str( config['default label']) + ";\n" + " }\n\n") + ingress.write(" table decision {\n key = { ") + for t in range(config['num_trees']): + ingress.write("meta.tree_" + str(t) + "_vote:exact;\n ") + ingress.write("}\n") + ingress.write(" actions={\n" + " read_lable;\n" + " write_default_decision;\n" + " }\n") + ingress.write(" size = " + str(config["decision_table_size"]) + ";\n" + " default_action = write_default_decision;\n" + " }\n\n") +################################################### +# Create a tables load script +# input: table script file name, tables data json file name, configuration +# output: none +################################################### + + +def ten_to_bin(num,count): + num = bin(num).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def create_tables_Commend(fname, config): + num_features = config['data config']['number of features'] + num_classes = config['model config']['number of classes'] + num_trees = config['model config']['number of trees'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + with open(fname, 'w') as file: + + for f in range(num_features): + for idx in Ternary_Table['feature ' + str(f)]: + priority = int(idx) + key = Ternary_Table['feature ' + str(f)][idx][1] + mask = Ternary_Table['feature ' + str(f)][idx][0] + codes = '' + for t in range(num_trees): + c_tree = Ternary_Table['feature ' + str(f)][idx][2][t] + c_len = config['p4 config']['width of code'][t][f] + codes = ten_to_bin(int(c_tree), int(c_len)) + codes + label = int(codes, 2) + file.write("table_add SwitchIngress.lookup_feature" + str(f)+" extract_feature" + str(f)+ + " "+str(key)+"&&&"+str(mask)+" => "+str(label)+" "+str(priority)+"\n") + + file.write("\n") + + + for t in range(num_trees): + for idx in Ternary_Table['tree ' + str(t)]: + file.write("table_add SwitchIngress.lookup_leaf_id" + str(t) + " read_prob" + str(t) + " ") + for f in range(num_features): + file.write(str(Ternary_Table['tree ' + str(t)][idx]['f' + str(f) + ' code']) + " ") + file.write("=> 0 " + str(Ternary_Table['tree ' + str(t)][idx]['leaf']) + "\n") + + file.write("\n") + + for idx in Ternary_Table['decision']: + file.write("table_add SwitchIngress.decision read_lable ") + for t in range(num_trees): + file.write(str(Ternary_Table['decision'][idx]['t' + str(t) + ' vote'])+" ") + file.write("=> "+str(Ternary_Table['decision'][idx]['class'])+"\n") + + + +def create_load_tables(fname, fjson, config, Planter_config, file_name): + work_root = Planter_config['directory config']['work'] + + commend_file = work_root + "/src/targets/bmv2/software/model_test/test_environment/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + commend_file = work_root + "/Tables/s1-commands.txt" + create_tables_Commend(commend_file, Planter_config) + + + config['debug_load_table'] = False + with open(fname, 'a') as tload: + tload.write("import json\n" \ + "import os\n" \ + "import binascii\n" \ + "import sys\n" + \ + ((not config['debug_load_table']) * ("sys.path.append('" + work_root + "')\n" \ + "os.chdir('" + work_root + "')\n")) + \ + "print('working dir: ' + os.getcwd())\n" \ + "table = json.load(open('./Tables/" + fjson + "','r'))\n" \ + "Planter_config = json.load(open('./src/configs/Planter_config.json','r'))\n"\ + "config = Planter_config['p4 config']\n\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress = bfrt."+file_name+".pipe.SwitchIngress\n") + tload.write((config['debug_load_table']) * ('# ') + "Ingress.clear()" + "\n\n") + + tload.write("def ten_to_bin(num, count):\n") + tload.write(" num = bin(num).lstrip('0b')\n") + tload.write(" if len(num) != count:\n") + tload.write(" cont = count - len(num)\n") + tload.write(" num = cont * '0' + num\n") + tload.write(" return num\n\n") + + + for i in range(0, config['num_features']): + tload.write("print('load feature " + str(i) + " table with',len(table['feature " + str( i) + "'].keys()),'entries')\n" \ + "for k in range(len(table['feature " + str( i) + "'].keys())):\n") + tload.write(" key = str(k)\n") + tload.write(" codes = ''\n") + tload.write(" for tree in range(config['number of trees']):\n") + + tload.write(" codes = ten_to_bin(int(table['feature " + str( i) + + "'][key][2][tree]), int(config['width of code'][tree][" + str(i) + "])) + codes\n") + + tload.write(" " + (config['debug_load_table'] * "# ") + + "Ingress.lookup_feature" + str(i) + + ".add_with_extract_feature" + str(i) + + "(table['feature " + str(i) + "'][key][1], table['feature " + str( i) + + "'][key][0], int(key), int(codes,2))\n") + if config['debug_load_table']: + tload.write( + " print('\\r{}th entry —— feature value: {} mask: {} priority: {} codes: {}'.format(key, table['feature " + str( + i) + \ + "'][key][1],table['feature " + str(i) + \ + "'][key][0], int(key), int(codes,2)), end='')\n\n") + + # Load tree tables + for i in range(0, config['num_trees']): + tload.write("print('load tree (code/code to vote) " + str(i) + " table with',len(table['tree " + str( + i) + "'].keys()),'entries')\n") + tload.write("for key in table['tree " + str(i) + "']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.lookup_leaf_id" + str( + i) + ".add_with_read_prob" + str( + i) + "(") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("0, table['tree " + str(i) + "'][key]['leaf'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for f in range(config['num_features']): + tload.write(" f" + str(f) + "_code: {}") + tload.write(" vote: {}'.format(key, ") + for f in range(config['num_features']): + tload.write("table['tree " + str(i) + "'][key]['f" + str(f) + " code'], ") + tload.write("int(table['tree " + str(i) + "'][key]['leaf'])), end='')\n\n") + + # Load decision tables + tload.write("print('load vote to class (decision) table with',len(table['decision'].keys()),'entries')\n") + tload.write("for key in table['decision']:\n") + tload.write(" " + (config['debug_load_table'] * "# ") + \ + "Ingress.decision.add_with_read_lable(") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class'])\n") + if config['debug_load_table']: + tload.write(" print('\\r{}th entry ——") + for t in range(config['num_trees']): + tload.write(" tree" + str(f) + "_vote: {}") + tload.write(" class: {}'.format(key, ") + for t in range(config['num_trees']): + tload.write("table['decision'][key]['t" + str(t) + " vote'], ") + tload.write("table['decision'][key]['class']), end='')\n\n") diff --git a/src/models/XGB/Type_EB_auto/readme.md b/src/models/XGB/Type_EB_auto/readme.md index b91f444..50d6d54 100644 --- a/src/models/XGB/Type_EB_auto/readme.md +++ b/src/models/XGB/Type_EB_auto/readme.md @@ -1 +1 @@ -This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains Planter-supported ML modules (training, algortihm mapping, and ML-related P4 generation) for XGB. ```dedicated_p4.py``` generates the P4 code dedicated to this ML model and ```table_generator.py``` generate ML model parameters in the format of table enries. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. diff --git a/src/models/XGB/Type_EB_auto/table_generator.py b/src/models/XGB/Type_EB_auto/table_generator.py index b0ad1b5..8180b5f 100755 --- a/src/models/XGB/Type_EB_auto/table_generator.py +++ b/src/models/XGB/Type_EB_auto/table_generator.py @@ -1,583 +1,583 @@ -# THIS FILE IS PART OF Planter PROJECT -# Planter.py - The core part of the Planter library -# -# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 -# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES -# -# Copyright (c) 2020-2021 Changgang Zheng -# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford -# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com -# -# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. -# Please refer to ./Docs/Planter_User_Document.pdf or further information. - -import numpy as np -import pandas as pd -from pandas import Series,DataFrame -from pandas import plotting -import os -import copy -import math -import time -import re -import json - -# %matplotlib inline -import matplotlib.pyplot as plt -plt.style.use('seaborn') -import seaborn as sns -sns.set_style("whitegrid") - -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import LabelEncoder -from sklearn.neighbors import KNeighborsClassifier -from sklearn import svm -from sklearn import metrics -from sklearn.tree import _tree -from sklearn import tree -from sklearn.tree import DecisionTreeClassifier -from sklearn.ensemble import RandomForestClassifier -from IPython.display import Image -import pydotplus -from sklearn.metrics import classification_report -import xgboost as xgb -from sklearn.metrics import * - -from src.functions.Range_to_TCAM_Top_Down import * -from src.functions.json_encoder import * -from src.functions.config_modification import * -from src.functions.input_CLI import * - -def map(value): - value = value - return value - -def get_path(model, conditions, path, num, leaf_info, tree_index): - if 'children' in model.keys(): - conditions_yes = copy.deepcopy(conditions) - conditions_no = copy.deepcopy(conditions) - if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: - conditions_yes[model["split"]][1] = map(model["split_condition"])-1 - if conditions_no[model["split"]][0] < map(model["split_condition"]) : - conditions_no[model["split"]][0] = map(model["split_condition"]) - for child_model in model["children"]: - if child_model["nodeid"]==model["yes"]: - path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) - if child_model["nodeid"]==model["no"]: - path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) - else: - # print(path, conditions) - path['path '+str(num)] = conditions - path['path '+str(num)]['leaf'] = model["leaf"] - # leaf_info['tree '+str(tree_index)] += [model["leaf"]] - leaf_info['tree ' + str(tree_index)] += [round(model["leaf"]/trade_off_factor, 1)] - if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] - elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] - num += 1 - return path, num, leaf_info - - -def find_feature_split(model, tree_index, num_features, feature_names): - count_layer = 0 - count_route = 0 - count_list = 0 - layer = {} - route = {} - layer[count_layer] = {} - layer[count_layer][count_list] = {} - layer[count_layer][count_list]["lst"] = [0] - layer[count_layer][count_list]["tab"] = model - feature_split = {} - num_features = len(feature_names) - - for i in range(num_features): - feature_split["feature " + str(i)] = [] - while True: - if len(layer[count_layer].keys()) == 0: - break - layer[count_layer + 1] = {} - count_list = 0 - for list_id in layer[count_layer]: - feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ - layer[count_layer][list_id]["tab"]["split_condition"]] - # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range - - for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): - if "children" not in children.keys(): - route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] - count_route += 1 - else: - layer[count_layer + 1][count_list] = {} - layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ - children["nodeid"]] - layer[count_layer + 1][count_list]["tab"] = children - count_list += 1 - count_layer += 1 - for f in range(num_features): - feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) - return feature_split - -def generate_feature_tables(split, num_features,feature_max, table): - for i in range(num_features): - table["feature "+str(i)] = {} - count_code = 0 - nife = sorted(split["feature "+str(i)]) - for j in range(int(feature_max[i]+1)): - if nife !=[] : - if len(nife) > count_code: - if j == nife[count_code]: - count_code+=1 - table["feature " + str(i)][j] = count_code - return table - -def path_to_path_to_leaf(path, num_features, table, leaf_code_list): - path_to_leaf ={} - for p in path: - path_to_leaf[p] = {} - # path_to_leaf[p]['leaf'] = path[p]['leaf'] - path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf']/trade_off_factor, 1)) - # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) - for f in range(num_features): - ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] - end = table['feature '+str(f)][path[p]['f'+str(f)][1]] - path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() - return path_to_leaf - - -def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): - conditions = {} - for i in range(num_features): - conditions["f" + str(i)] = [0, feature_max[i]] - feature_split["feature " + str(i)] += [feature_max[i]] - - path = {} - path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) - leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) - path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) - return path_to_leaf, leaf_info - -def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): - if feature_num == num_features: - table['code to vote'][count] = {} - for f in range(num_features): - table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] - table['code to vote'][count]['leaf'] = leaf_path['leaf'] - count += 1 - return table, count - else: - for value in leaf_path['feature '+str(feature_num)]: - code_dict['feature ' + str(feature_num)] = value - feature_num += 1 - table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) - feature_num -= 1 - return table, count - -def generate_code_table(table, path_to_leaf, num_features): - table['code to vote'] = {} - count = 0 - for p in path_to_leaf: - table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) - return table - -def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): - - feature_split = find_feature_split(model, tree_index, num_features, feature_names) - g_table[tree_index] = {} - g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) - leaf_info['tree '+str(tree_index)] = [] - path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) - - code_width_for_feature = np.zeros(num_features) - for i in range(num_features): - code_width_for_feature[i] = np.ceil(math.log( - g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) - g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) - print('\rThe table for Tree: {} is generated'.format(tree_index), end="") - return g_table, leaf_info - - -def ten_to_bin(num,count): - num = bin(int(num)).lstrip('0b') - - if len(num) != count: - cont = count - len(num) - num = cont * '0' + num - return num - -def MaxMin_Norm_with_range(x, min , max, ranges = 10): - """[0,1] normaliaztion""" - x = (x - min) / (max - min) - return np.floor(ranges*x) - -def run_model(train_X, train_y, test_X, test_y, used_features): - - Planter_config = reload_config('src/configs/Planter_config.json') - - # =================== set tree numbers in config =================== - question = 'Number of trees?' - default = 5 - Planter_config = take_CLI_input(Planter_config, 'model config', 'number of trees', question, default, - manually_input = False, numeric=True) - - # =================== set depth in config =================== - question = 'Number of depth?' - default = 4 - Planter_config = take_CLI_input(Planter_config, 'model config', 'number of depth', question, default, - manually_input = False, numeric=True) - - # =================== set max leaf node in config =================== - question = 'Number of leaf nodes?' - default = 1000 - Planter_config = take_CLI_input(Planter_config, 'model config', 'max number of leaf nodes', question, default, - manually_input = False, numeric=True) - - # =================== accuracy trade-offs factor in config =================== - question = 'Value of accuracy trade-offs factor?' - default = 1 - Planter_config = take_CLI_input(Planter_config, 'model config', 'trade-offs factor', question, default, - manually_input = False, numeric=True) - - global trade_off_factor - trade_off_factor = Planter_config['model config']['trade-offs factor'] - - Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) - - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - - feature_names = [] - for i, f in enumerate(used_features): - train_X.rename(columns={f: "f"+str(i)}, inplace=True) - test_X.rename(columns={f: "f" + str(i)}, inplace=True) - feature_names+=["f"+str(i)] - - feature_max = [] - for i in feature_names: - t_t = [test_X[[i]].max()[0], train_X[[i]].max()[0]] - feature_max += [int(np.max(t_t)+1)] - - # =================== train model timer =================== - Planter_config['timer log']['train model'] = {} - Planter_config['timer log']['train model']['start'] = time.time() - # =================== train model timer =================== - - # XGBoost - - data_train = xgb.DMatrix(train_X, label=train_y) - data_test = xgb.DMatrix(test_X, label=test_y) - watchlist = [(data_test, 'eval'), (data_train, 'train')] - param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} - bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) - - # param = {'max_depth': 8, 'num_class': 2} - # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) - bst.dump_model("src/temp/tree.txt") - sklearn_y_predict = bst.predict(data_test) - - result = classification_report(test_y, sklearn_y_predict) - # exit() - result = classification_report(test_y, sklearn_y_predict, digits=4) - print('\n', result) - - # =================== train model timer =================== - Planter_config['timer log']['train model']['end'] = time.time() - # =================== train model timer =================== - - # =================== convert model timer =================== - Planter_config['timer log']['convert model'] = {} - Planter_config['timer log']['convert model']['start'] = time.time() - # =================== convert model timer =================== - - log_file = 'src/logs/log.json' - if os.path.exists(log_file): - log_dict = json.load(open(log_file, 'r')) - else: - log_dict = {} - - if ("num_feature: " + str(num_features)) not in log_dict: - log_dict["num_feature: " + str(num_features)] = {} - if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} - if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ - "num_tree: " + str(num_trees)]: - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ - "num_depth: " + str(num_depth)] = {} - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "classification_report"] = result - log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ - "max number of leaf nodes"] = max_leaf_nodes - json.dump(log_dict, open(log_file, 'w'), indent=4) - print('Classification results are downloaded to log as', log_file) - - - the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") - xgb_model = {} - for i, m in enumerate(the_model): - xgb_model[i] = json.loads(m) - - - - g_table = {} - # feature_names = test_X.columns.T.tolist() - leaf_info ={} - leaf_info['max value'] = 0 - leaf_info['min value'] = 0 - for idx in xgb_model: - estimator = xgb_model[idx] - g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) - - - - def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): - if tree_num == num_trees: - vote = np.zeros(num_classes).tolist() - for t in range(num_trees): - vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] - # if vote.index(np.max(vote))== 0: - # if True : - g_table['votes to class'][num] = {} - for t in range(len(vote_list)): - g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] - g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) - num += 1 - return g_table, num - else: - for value in range(len(leaf_info["tree "+str(tree_num)])): - vote_list[tree_num] = value - tree_num += 1 - g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) - tree_num -= 1 - return g_table, num - - - ranges = 10 - g_table['votes to class'] = {} - print("\nGenerating vote to class table...",end="") - g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) - print('Done') - - feature_width = [] - for maxs in feature_max: - feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] - - - code_width_tree_feature = np.zeros((num_trees,num_features)) - for i in range(num_features): - for tree in range(num_trees): - # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) - code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) - # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) - # print('stop') - - - Ternary_Table = {} - Ternary_Table['decision'] = g_table['votes to class'] - - for tree in range(num_trees): - Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] - - for i in range(num_features): - Ternary_Table['feature '+str(i)] = {} - for value in range(feature_max[i]): - Ternary_Table['feature ' + str(i)][value] = [] - for tree in range(num_trees): - Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] - Exact_Table = copy.deepcopy(Ternary_Table) - for i in range(num_features): - if i!=0: - print('') - print('Begine transfer: Feature table ' + str(i)) - Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) - - - # ===================== prepare default vote ========================= - print("\nPreparing default vote...", end="") - collect_votes = [] - for t in range(num_trees): - for idx in Exact_Table['tree ' + str(t)]: - collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] - default_vote = max(collect_votes, key=collect_votes.count) - - code_table_size = 0 - for t in range(num_trees): - Ternary_Table['tree ' + str(t)] = {} - for idx in Exact_Table['tree ' + str(t)]: - if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: - Ternary_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] - code_table_size += 1 - Exact_Table['tree ' + str(t)] = copy.deepcopy(Ternary_Table['tree ' + str(t)]) - print('Done') - # ===================== prepare default class ========================= - print("Preparing default class...", end="") - collect_class = np.zeros(num_classes).tolist() - for idx in Exact_Table['decision']: - collect_class[Exact_Table['decision'][idx]['class']] += 1 - default_class = collect_class.index(max(collect_class)) - - code_table_size = 0 - Ternary_Table['decision'] = {} - for idx in Exact_Table['decision']: - if Exact_Table['decision'][idx]['class'] != default_class: - Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] - code_table_size += 1 - Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) - print('Done') - - # =================== convert model timer =================== - Planter_config['timer log']['convert model']['end'] = time.time() - # =================== convert model timer =================== - - json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) - print('Ternary_Table is generated') - json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) - print('Exact_Table is generated') - - - Planter_config['p4 config'] = {} - Planter_config['p4 config']["model"] = "XGB" - Planter_config['p4 config']["number of features"] = num_features - Planter_config['p4 config']["number of classes"] = num_classes - Planter_config['p4 config']["number of trees"] = num_trees - Planter_config['p4 config']['table name'] = 'Ternary_Table.json' - Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys())+1 - Planter_config['p4 config']["code table size"] = [] - for tree in range(num_trees): - Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree ' + str(tree)].keys())+1] - Planter_config['p4 config']["default vote"] = default_vote - Planter_config['p4 config']["default label"] = default_class - Planter_config['p4 config']["width of feature"] = feature_width - Planter_config['p4 config']["width of code"] = code_width_tree_feature - Planter_config['p4 config']["used columns"] = [] - for i in range(num_features): - Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())+1] - Planter_config['p4 config']["width of probability"] = 7 - Planter_config['p4 config']["width of result"] = 8 - Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] - if 'test config' not in Planter_config.keys(): - Planter_config['test config'] = {} - Planter_config['test config']['type of test'] = 'classification' - - # dump the config file - dump_config(Planter_config, 'src/configs/Planter_config.json') - print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') - - # main() - return sklearn_y_predict.tolist() - - - -def test_tables(sklearn_test_y, test_X, test_y): - - - - config_file = 'src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - num_features = Planter_config['data config']['number of features'] - num_classes = Planter_config['model config']['number of classes'] - num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) - num_depth = Planter_config['model config']['number of depth'] - max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] - Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) - Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) - - - print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') - same = 0 - correct = 0 - error = 0 - switch_test_y = [] - for i in range(np.shape(test_X.values)[0]): - vote_list = np.zeros(num_trees).astype(dtype=int).tolist() - for tree in range(num_trees): - code_list = np.zeros(num_features) - ternary_code_list = np.zeros(num_features) - input_feature_value = test_X.values[i] - - for f in range(num_features): - match_or_not = False - - # matcg ternary - TCAM_table = Ternary_Table['feature ' + str(f)] - keys = list(TCAM_table.keys()) - - for count in keys: - - if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: - ternary_code_list[f] = TCAM_table[count][2][tree] - match_or_not = True - break - - if not match_or_not: - print('feature table not matched') - # matcg exact - code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] - if not match_or_not: - print('feature table not matched') - if str(code_list)!=str(ternary_code_list): - print('error in exact to ternary match', code_list,ternary_code_list) - for key in Exact_Table["tree " + str(tree)]: - - match_or_not = False - all_True = True - for code_f in range(num_features): - if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: - all_True = False - break - if all_True: - vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) - match_or_not = True - break - if not match_or_not: - vote_list[tree] = Planter_config['p4 config']["default vote"] - - - for key in Exact_Table['decision']: - match_or_not = False - all_True = True - for tree_v in range(num_trees): - if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: - all_True = False - break - if all_True: - switch_prediction = Exact_Table['decision'][key]['class'] - match_or_not = True - break - if not match_or_not: - switch_prediction = Planter_config['p4 config']["default label"] - - - switch_test_y += [switch_prediction] - if switch_prediction == test_y[i]: - correct += 1 - - if switch_prediction == sklearn_test_y[i]: - same += 1 - else: - error += 1 - - if i % 1 == 0 and i!=0: - print( - '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( - switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), - accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") - - - print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) - result = classification_report(switch_test_y, test_y, digits=4) - print('\n', result) - - -def resource_prediction(): - - config_file = './src/configs/Planter_config.json' - Planter_config = json.load(open(config_file, 'r')) - - print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) - print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) - - +# THIS FILE IS PART OF Planter PROJECT +# Planter.py - The core part of the Planter library +# +# THIS PROGRAM IS FREE SOFTWARE TOOL, WHICH MAPS MACHINE LEARNING ALGORITHMS TO DATA PLANE, IS LICENSED UNDER Apache-2.0 +# YOU SHOULD HAVE RECEIVED A COPY OF WTFPL LICENSE, IF NOT, PLEASE CONTACT THE FOLLOWING E-MAIL ADDRESSES +# +# Copyright (c) 2020-2021 Changgang Zheng +# Copyright (c) Computing Infrastructure Lab, Department of Engineering Science, University of Oxford +# E-mail: changgang.zheng@eng.ox.ac.uk or changgangzheng@qq.com +# +# Functions: This file is responsible for training, algorithm mapping, and software testing of the ML model. +# Please refer to ./Docs/Planter_User_Document.pdf or further information. + +import numpy as np +import pandas as pd +from pandas import Series,DataFrame +from pandas import plotting +import os +import copy +import math +import time +import re +import json + +# %matplotlib inline +import matplotlib.pyplot as plt +plt.style.use('seaborn-v0_8') +import seaborn as sns +sns.set_style("whitegrid") + +from sklearn.linear_model import LogisticRegression +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import LabelEncoder +from sklearn.neighbors import KNeighborsClassifier +from sklearn import svm +from sklearn import metrics +from sklearn.tree import _tree +from sklearn import tree +from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier +from IPython.display import Image +import pydotplus +from sklearn.metrics import classification_report +import xgboost as xgb +from sklearn.metrics import * + +from src.functions.Range_to_TCAM_Top_Down import * +from src.functions.json_encoder import * +from src.functions.config_modification import * +from src.functions.input_CLI import * + +def map(value): + value = value + return value + +def get_path(model, conditions, path, num, leaf_info, tree_index): + if 'children' in model.keys(): + conditions_yes = copy.deepcopy(conditions) + conditions_no = copy.deepcopy(conditions) + if conditions_yes[model["split"]][1] > map(model["split_condition"])-1: + conditions_yes[model["split"]][1] = map(model["split_condition"])-1 + if conditions_no[model["split"]][0] < map(model["split_condition"]) : + conditions_no[model["split"]][0] = map(model["split_condition"]) + for child_model in model["children"]: + if child_model["nodeid"]==model["yes"]: + path, num, leaf_info = get_path(child_model, conditions_yes, path, num, leaf_info, tree_index) + if child_model["nodeid"]==model["no"]: + path, num, leaf_info = get_path(child_model, conditions_no, path, num, leaf_info, tree_index) + else: + # print(path, conditions) + path['path '+str(num)] = conditions + path['path '+str(num)]['leaf'] = model["leaf"] + # leaf_info['tree '+str(tree_index)] += [model["leaf"]] + leaf_info['tree ' + str(tree_index)] += [round(model["leaf"]/trade_off_factor, 1)] + if model["leaf"] > leaf_info['max value']: leaf_info['max value'] = model["leaf"] + elif model["leaf"] < leaf_info['min value']: leaf_info['min value'] = model["leaf"] + num += 1 + return path, num, leaf_info + + +def find_feature_split(model, tree_index, num_features, feature_names): + count_layer = 0 + count_route = 0 + count_list = 0 + layer = {} + route = {} + layer[count_layer] = {} + layer[count_layer][count_list] = {} + layer[count_layer][count_list]["lst"] = [0] + layer[count_layer][count_list]["tab"] = model + feature_split = {} + num_features = len(feature_names) + + for i in range(num_features): + feature_split["feature " + str(i)] = [] + while True: + if len(layer[count_layer].keys()) == 0: + break + layer[count_layer + 1] = {} + count_list = 0 + for list_id in layer[count_layer]: + feature_split["feature " + str(feature_names.index(layer[count_layer][list_id]["tab"]["split"]))] += [ + layer[count_layer][list_id]["tab"]["split_condition"]] + # (optional add -1)The -1 means the feature splits is for <= =, so each split is largest value in each range + + for i, children in enumerate(layer[count_layer][list_id]["tab"]["children"]): + if "children" not in children.keys(): + route[count_route] = layer[count_layer][list_id]["lst"] + [children["nodeid"]] + count_route += 1 + else: + layer[count_layer + 1][count_list] = {} + layer[count_layer + 1][count_list]["lst"] = layer[count_layer][list_id]["lst"] + [ + children["nodeid"]] + layer[count_layer + 1][count_list]["tab"] = children + count_list += 1 + count_layer += 1 + for f in range(num_features): + feature_split['feature ' + str(f)] = sorted(list(set(feature_split['feature ' + str(f)]))) + return feature_split + +def generate_feature_tables(split, num_features,feature_max, table): + for i in range(num_features): + table["feature "+str(i)] = {} + count_code = 0 + nife = sorted(split["feature "+str(i)]) + for j in range(int(feature_max[i]+1)): + if nife !=[] : + if len(nife) > count_code: + if j == nife[count_code]: + count_code+=1 + table["feature " + str(i)][j] = count_code + return table + +def path_to_path_to_leaf(path, num_features, table, leaf_code_list): + path_to_leaf ={} + for p in path: + path_to_leaf[p] = {} + # path_to_leaf[p]['leaf'] = path[p]['leaf'] + path_to_leaf[p]['leaf'] = leaf_code_list.index(round(path[p]['leaf']/trade_off_factor, 1)) + # path_to_leaf[p]['leaf'] = leaf_code_list.index(path[p]['leaf']) + for f in range(num_features): + ini = table['feature '+str(f)][path[p]['f'+str(f)][0]] + end = table['feature '+str(f)][path[p]['f'+str(f)][1]] + path_to_leaf[p]['feature '+str(f)] = np.arange(ini,end+1).tolist() + return path_to_leaf + + +def find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, table, leaf_info, tree_index): + conditions = {} + for i in range(num_features): + conditions["f" + str(i)] = [0, feature_max[i]] + feature_split["feature " + str(i)] += [feature_max[i]] + + path = {} + path, _, leaf_info = get_path(model, conditions, path, 0, leaf_info, tree_index) + leaf_info['tree '+str(tree_index)] = sorted(list(set(leaf_info['tree '+str(tree_index)]))) + path_to_leaf = path_to_path_to_leaf(path, num_features, table, leaf_info['tree '+str(tree_index)] ) + return path_to_leaf, leaf_info + +def generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count): + if feature_num == num_features: + table['code to vote'][count] = {} + for f in range(num_features): + table['code to vote'][count]['f'+str(f)+' code'] = code_dict['feature ' + str(f)] + table['code to vote'][count]['leaf'] = leaf_path['leaf'] + count += 1 + return table, count + else: + for value in leaf_path['feature '+str(feature_num)]: + code_dict['feature ' + str(feature_num)] = value + feature_num += 1 + table, count = generate_code_table_for_path(table, leaf_path, code_dict, feature_num, num_features, count) + feature_num -= 1 + return table, count + +def generate_code_table(table, path_to_leaf, num_features): + table['code to vote'] = {} + count = 0 + for p in path_to_leaf: + table, count = generate_code_table_for_path(table, path_to_leaf[p], {}, 0, num_features, count) + return table + +def generate_table(model,tree_index, g_table, num_features, feature_names, feature_max, leaf_info): + + feature_split = find_feature_split(model, tree_index, num_features, feature_names) + g_table[tree_index] = {} + g_table[tree_index] = generate_feature_tables(feature_split, num_features, feature_max, g_table[tree_index]) + leaf_info['tree '+str(tree_index)] = [] + path_to_leaf, leaf_info = find_path_for_leaf_nodes(model, feature_split, feature_max, num_features, g_table[tree_index], leaf_info, tree_index) + + code_width_for_feature = np.zeros(num_features) + for i in range(num_features): + code_width_for_feature[i] = np.ceil(math.log( + g_table[tree_index]['feature ' + str(i)][np.max(list(g_table[tree_index]['feature ' + str(i)].keys()))] + 1, 2)) + g_table[tree_index] = generate_code_table(g_table[tree_index], path_to_leaf, num_features) + print('\rThe table for Tree: {} is generated'.format(tree_index), end="") + return g_table, leaf_info + + +def ten_to_bin(num,count): + num = bin(int(num)).lstrip('0b') + + if len(num) != count: + cont = count - len(num) + num = cont * '0' + num + return num + +def MaxMin_Norm_with_range(x, min , max, ranges = 10): + """[0,1] normaliaztion""" + x = (x - min) / (max - min) + return np.floor(ranges*x) + +def run_model(train_X, train_y, test_X, test_y, used_features): + + Planter_config = reload_config('src/configs/Planter_config.json') + + # =================== set tree numbers in config =================== + question = 'Number of trees?' + default = 5 + Planter_config = take_CLI_input(Planter_config, 'model config', 'number of trees', question, default, + manually_input = False, numeric=True) + + # =================== set depth in config =================== + question = 'Number of depth?' + default = 4 + Planter_config = take_CLI_input(Planter_config, 'model config', 'number of depth', question, default, + manually_input = False, numeric=True) + + # =================== set max leaf node in config =================== + question = 'Number of leaf nodes?' + default = 1000 + Planter_config = take_CLI_input(Planter_config, 'model config', 'max number of leaf nodes', question, default, + manually_input = False, numeric=True) + + # =================== accuracy trade-offs factor in config =================== + question = 'Value of accuracy trade-offs factor?' + default = 1 + Planter_config = take_CLI_input(Planter_config, 'model config', 'trade-offs factor', question, default, + manually_input = False, numeric=True) + + global trade_off_factor + trade_off_factor = Planter_config['model config']['trade-offs factor'] + + Planter_config['model config']['number of classes'] = int(np.max(train_y) + 1) + + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_boost_rounds = int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_trees = Planter_config['model config']['number of classes'] * int(int(Planter_config['model config']['number of trees']) / Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + + feature_names = [] + for i, f in enumerate(used_features): + train_X.rename(columns={f: "f"+str(i)}, inplace=True) + test_X.rename(columns={f: "f" + str(i)}, inplace=True) + feature_names+=["f"+str(i)] + + feature_max = [] + for i in feature_names: + t_t = [test_X[[i]].max().iloc[0], train_X[[i]].max().iloc[0]] + feature_max += [int(np.max(t_t)+1)] + + # =================== train model timer =================== + Planter_config['timer log']['train model'] = {} + Planter_config['timer log']['train model']['start'] = time.time() + # =================== train model timer =================== + + # XGBoost + + data_train = xgb.DMatrix(train_X, label=train_y) + data_test = xgb.DMatrix(test_X, label=test_y) + watchlist = [(data_test, 'eval'), (data_train, 'train')] + param = {'max_depth': num_depth, 'eta': 1, 'silent': 0, 'objective': 'multi:softmax', 'num_class': num_classes} + bst = xgb.train(param, data_train, num_boost_round=num_boost_rounds, evals=watchlist) + + # param = {'max_depth': 8, 'num_class': 2} + # bst = xgb.train(param, data_train, num_boost_round=200, evals=watchlist) + bst.dump_model("src/temp/tree.txt") + sklearn_y_predict = bst.predict(data_test) + + result = classification_report(test_y, sklearn_y_predict) + # exit() + result = classification_report(test_y, sklearn_y_predict, digits=4) + print('\n', result) + + # =================== train model timer =================== + Planter_config['timer log']['train model']['end'] = time.time() + # =================== train model timer =================== + + # =================== convert model timer =================== + Planter_config['timer log']['convert model'] = {} + Planter_config['timer log']['convert model']['start'] = time.time() + # =================== convert model timer =================== + + log_file = 'src/logs/log.json' + if os.path.exists(log_file): + log_dict = json.load(open(log_file, 'r')) + else: + log_dict = {} + + if ("num_feature: " + str(num_features)) not in log_dict: + log_dict["num_feature: " + str(num_features)] = {} + if ("num_tree: " + str(num_trees)) not in log_dict["num_feature: " + str(num_features)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)] = {} + if ("num_depth: " + str(num_depth)) not in log_dict["num_feature: " + str(num_features)][ + "num_tree: " + str(num_trees)]: + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)][ + "num_depth: " + str(num_depth)] = {} + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "classification_report"] = result + log_dict["num_feature: " + str(num_features)]["num_tree: " + str(num_trees)]["num_depth: " + str(num_depth)][ + "max number of leaf nodes"] = max_leaf_nodes + json.dump(log_dict, open(log_file, 'w'), indent=4) + print('Classification results are downloaded to log as', log_file) + + + the_model= bst.get_dump(fmap="", with_stats=False, dump_format="json") + xgb_model = {} + for i, m in enumerate(the_model): + xgb_model[i] = json.loads(m) + + + + g_table = {} + # feature_names = test_X.columns.T.tolist() + leaf_info ={} + leaf_info['max value'] = 0 + leaf_info['min value'] = 0 + for idx in xgb_model: + estimator = xgb_model[idx] + g_table, leaf_info = generate_table(estimator, idx, g_table, num_features, feature_names, feature_max, leaf_info) + + + + def votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info): + if tree_num == num_trees: + vote = np.zeros(num_classes).tolist() + for t in range(num_trees): + vote[t%num_classes] += leaf_info["tree "+str(t)][vote_list[t]] + # if vote.index(np.max(vote))== 0: + # if True : + g_table['votes to class'][num] = {} + for t in range(len(vote_list)): + g_table['votes to class'][num]['t'+str(t)+' vote'] = vote_list[t] + g_table['votes to class'][num]['class'] = vote.index(np.max(vote)) + num += 1 + return g_table, num + else: + for value in range(len(leaf_info["tree "+str(tree_num)])): + vote_list[tree_num] = value + tree_num += 1 + g_table, num = votes_to_class(tree_num, vote_list, num_trees, num_classes, g_table, num, leaf_info) + tree_num -= 1 + return g_table, num + + + ranges = 10 + g_table['votes to class'] = {} + print("\nGenerating vote to class table...",end="") + g_table, _ = votes_to_class(0, np.zeros(num_trees).tolist(), num_trees, num_classes, g_table, 0, leaf_info) + print('Done') + + feature_width = [] + for maxs in feature_max: + feature_width += [int(np.ceil(math.log(maxs, 2)) + 1)] + + + code_width_tree_feature = np.zeros((num_trees,num_features)) + for i in range(num_features): + for tree in range(num_trees): + # code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][feature_max[i]],2)) + code_width_tree_feature[tree, i] = np.ceil(math.log(g_table[tree]['feature ' + str(i)][np.max(list(g_table[tree]['feature ' + str(i)].keys()))]+1,2)+1) + # print(code_width_tree_feature[tree, i] , g_table[tree]['feature ' + str(i)][feature_max[i]]) + # print('stop') + + + Ternary_Table = {} + Ternary_Table['decision'] = g_table['votes to class'] + + for tree in range(num_trees): + Ternary_Table['tree ' + str(tree)] = g_table[tree]['code to vote'] + + for i in range(num_features): + Ternary_Table['feature '+str(i)] = {} + for value in range(feature_max[i]): + Ternary_Table['feature ' + str(i)][value] = [] + for tree in range(num_trees): + Ternary_Table['feature ' + str(i)][value] += [g_table[tree]["feature "+str(i)][value]] + Exact_Table = copy.deepcopy(Ternary_Table) + for i in range(num_features): + if i!=0: + print('') + print('Begine transfer: Feature table ' + str(i)) + Ternary_Table['feature '+str(i)]= Table_to_TCAM(Ternary_Table['feature '+str(i)], feature_width[i]) + + + # ===================== prepare default vote ========================= + print("\nPreparing default vote...", end="") + collect_votes = [] + for t in range(num_trees): + for idx in Exact_Table['tree ' + str(t)]: + collect_votes += [int(Exact_Table['tree ' + str(t)][idx]['leaf'])] + default_vote = max(collect_votes, key=collect_votes.count) + + code_table_size = 0 + for t in range(num_trees): + Ternary_Table['tree ' + str(t)] = {} + for idx in Exact_Table['tree ' + str(t)]: + if int(Exact_Table['tree ' + str(t)][idx]['leaf']) != default_vote: + Ternary_Table['tree ' + str(t)][code_table_size] = Exact_Table['tree ' + str(t)][idx] + code_table_size += 1 + Exact_Table['tree ' + str(t)] = copy.deepcopy(Ternary_Table['tree ' + str(t)]) + print('Done') + # ===================== prepare default class ========================= + print("Preparing default class...", end="") + collect_class = np.zeros(num_classes).tolist() + for idx in Exact_Table['decision']: + collect_class[Exact_Table['decision'][idx]['class']] += 1 + default_class = collect_class.index(max(collect_class)) + + code_table_size = 0 + Ternary_Table['decision'] = {} + for idx in Exact_Table['decision']: + if Exact_Table['decision'][idx]['class'] != default_class: + Ternary_Table['decision'][code_table_size] = Exact_Table['decision'][idx] + code_table_size += 1 + Exact_Table['decision'] = copy.deepcopy(Ternary_Table['decision']) + print('Done') + + # =================== convert model timer =================== + Planter_config['timer log']['convert model']['end'] = time.time() + # =================== convert model timer =================== + + json.dump(Ternary_Table, open('Tables/Ternary_Table.json', 'w'), indent=4) + print('Ternary_Table is generated') + json.dump(Exact_Table, open('Tables/Exact_Table.json', 'w'), indent=4) + print('Exact_Table is generated') + + + Planter_config['p4 config'] = {} + Planter_config['p4 config']["model"] = "XGB" + Planter_config['p4 config']["number of features"] = num_features + Planter_config['p4 config']["number of classes"] = num_classes + Planter_config['p4 config']["number of trees"] = num_trees + Planter_config['p4 config']['table name'] = 'Ternary_Table.json' + Planter_config['p4 config']["decision table size"] = len(Ternary_Table['decision'].keys())+1 + Planter_config['p4 config']["code table size"] = [] + for tree in range(num_trees): + Planter_config['p4 config']["code table size"] += [len(Ternary_Table['tree ' + str(tree)].keys())+1] + Planter_config['p4 config']["default vote"] = default_vote + Planter_config['p4 config']["default label"] = default_class + Planter_config['p4 config']["width of feature"] = feature_width + Planter_config['p4 config']["width of code"] = code_width_tree_feature + Planter_config['p4 config']["used columns"] = [] + for i in range(num_features): + Planter_config['p4 config']["used columns"] += [len(Ternary_Table['feature ' + str(i)].keys())+1] + Planter_config['p4 config']["width of probability"] = 7 + Planter_config['p4 config']["width of result"] = 8 + Planter_config['p4 config']["standard headers"] = ["ethernet", "Planter", "arp", "ipv4", "tcp", "udp", "vlan_tag"] + if 'test config' not in Planter_config.keys(): + Planter_config['test config'] = {} + Planter_config['test config']['type of test'] = 'classification' + + # dump the config file + dump_config(Planter_config, 'src/configs/Planter_config.json') + print(Planter_config['directory config']['work'] + '/src/configs/Planter_config.json is generated') + + # main() + return sklearn_y_predict.tolist() + + + +def test_tables(sklearn_test_y, test_X, test_y): + + + + config_file = 'src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + num_features = Planter_config['data config']['number of features'] + num_classes = Planter_config['model config']['number of classes'] + num_trees = Planter_config['model config']['number of classes']* int(int(Planter_config['model config']['number of trees'])/Planter_config['model config']['number of classes']) + num_depth = Planter_config['model config']['number of depth'] + max_leaf_nodes = Planter_config['model config']['max number of leaf nodes'] + Ternary_Table = json.load(open('Tables/Ternary_Table.json', 'r')) + Exact_Table = json.load(open('Tables/Exact_Table.json', 'r')) + + + print('Test the exact feature table, extact code and decision table (feel free if the acc to sklearn is slightly lower than 1)') + same = 0 + correct = 0 + error = 0 + switch_test_y = [] + for i in range(np.shape(test_X.values)[0]): + vote_list = np.zeros(num_trees).astype(dtype=int).tolist() + for tree in range(num_trees): + code_list = np.zeros(num_features) + ternary_code_list = np.zeros(num_features) + input_feature_value = test_X.values[i] + + for f in range(num_features): + match_or_not = False + + # matcg ternary + TCAM_table = Ternary_Table['feature ' + str(f)] + keys = list(TCAM_table.keys()) + + for count in keys: + + if input_feature_value[f] & TCAM_table[count][0] == TCAM_table[count][0] & TCAM_table[count][1]: + ternary_code_list[f] = TCAM_table[count][2][tree] + match_or_not = True + break + + if not match_or_not: + print('feature table not matched') + # matcg exact + code_list[f] = Exact_Table['feature ' + str(f)][str(input_feature_value[f])][tree] + if not match_or_not: + print('feature table not matched') + if str(code_list)!=str(ternary_code_list): + print('error in exact to ternary match', code_list,ternary_code_list) + for key in Exact_Table["tree " + str(tree)]: + + match_or_not = False + all_True = True + for code_f in range(num_features): + if not Exact_Table["tree " + str(tree)][key]['f' + str(code_f) + ' code'] == code_list[code_f]: + all_True = False + break + if all_True: + vote_list[tree] = int(Exact_Table["tree " + str(tree)][key]['leaf']) + match_or_not = True + break + if not match_or_not: + vote_list[tree] = Planter_config['p4 config']["default vote"] + + + for key in Exact_Table['decision']: + match_or_not = False + all_True = True + for tree_v in range(num_trees): + if not Exact_Table["decision"][key]['t' + str(tree_v) + ' vote'] == vote_list[tree_v]: + all_True = False + break + if all_True: + switch_prediction = Exact_Table['decision'][key]['class'] + match_or_not = True + break + if not match_or_not: + switch_prediction = Planter_config['p4 config']["default label"] + + + switch_test_y += [switch_prediction] + if switch_prediction == test_y[i]: + correct += 1 + + if switch_prediction == sklearn_test_y[i]: + same += 1 + else: + error += 1 + + if i % 1 == 0 and i!=0: + print( + '\rswitch_prediction: {}, test_y: {}, with acc: {:.3}, with acc to sklearn: {:.4}, with error: {:.3}, M/A format macro f1: {:.3}, macro f1: {:.3}'.format( + switch_prediction, test_y[i], correct / (i + 1), same / (i + 1), error / (i + 1), + accuracy_score(switch_test_y[:i], test_y[:i] ),accuracy_score(sklearn_test_y[:i], test_y[:i] )), end="") + + + print('\nThe accuracy of the match action format of XGBoost is', correct / np.shape(test_X.values)[0]) + result = classification_report(switch_test_y, test_y, digits=4) + print('\n', result) + + +def resource_prediction(): + + config_file = './src/configs/Planter_config.json' + Planter_config = json.load(open(config_file, 'r')) + + print('Exact match entries: ',np.sum(Planter_config['p4 config']["code table size"])+ Planter_config['p4 config']["decision table size"] ) + print('Ternary match entries: ', np.sum(Planter_config['p4 config']["used columns"])) + + diff --git a/src/models/XGB/readme.md b/src/models/XGB/readme.md index 0231bf0..bb382ae 100644 --- a/src/models/XGB/readme.md +++ b/src/models/XGB/readme.md @@ -1 +1 @@ -This folder contains part of the variations for Planter-supported XGB. Please refer to ```./Docs/Planter_User_Document.pdf```for further information. +This folder contains part of the variations for Planter-supported XGB. Please refer to ```./Docs/Planter_User_Document.pdf```for further information.