From 59e5fe77bdb0e61f9362faea78bb0f2584e9151d Mon Sep 17 00:00:00 2001 From: zhangjian <1032674385@qq.com> Date: Wed, 12 Jun 2024 09:54:00 +0800 Subject: [PATCH 1/5] add white list in run_ut.py to skip the empty operator --- Acc/run_ut/run_dualback_ut.py | 4 ++++ Acc/run_ut/run_ut.py | 26 +++++++++++++++----------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/Acc/run_ut/run_dualback_ut.py b/Acc/run_ut/run_dualback_ut.py index 47a9e54..537bff7 100644 --- a/Acc/run_ut/run_dualback_ut.py +++ b/Acc/run_ut/run_dualback_ut.py @@ -16,6 +16,7 @@ from file_check_util import FileCheckConst, FileChecker, check_link, change_mode, check_file_suffix seed_all() current_time = time.strftime("%Y%m%d%H%M%S") +white_list = ['empty'] # 算子白名单 tqdm_params = { 'smoothing': 0, # 平滑进度条的预计剩余时间,取值范围0到1 @@ -92,6 +93,9 @@ def run_ut_save(forward_content,real_data_path,out_path,backend): print_info_log("start UT save") for i, (api_full_name, api_info_dict) in enumerate(tqdm(forward_content.items(), **tqdm_params)): try: + [_, api_name, _] = api_full_name.split(Const.DELIMITER) + if api_name in set(white_list): + continue print(api_full_name) run_paddle_api_save(api_full_name, real_data_path, api_info_dict, out_path, backend) print("*"*100) diff --git a/Acc/run_ut/run_ut.py b/Acc/run_ut/run_ut.py index 55be858..5a040c8 100644 --- a/Acc/run_ut/run_ut.py +++ b/Acc/run_ut/run_ut.py @@ -14,7 +14,7 @@ from data_generate import gen_api_params, gen_args from run_ut_utils import hf_32_standard_api, Backward_Message from file_check_util import FileOpen, FileCheckConst, FileChecker, check_link, change_mode, check_file_suffix -# from compare.compare import Comparator +from compare.compare import Comparator seed_all() not_raise_dtype_set = {'type_as'} @@ -25,6 +25,7 @@ DETAILS_FILE_NAME = f"accuracy_checking_details_" + current_time + ".csv" RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path', 'save_error_data', 'is_continue_run_ut', 'real_data_path']) +white_list = ['empty'] # 算子白名单 tqdm_params = { 'smoothing': 0, # 平滑进度条的预计剩余时间,取值范围0到1 @@ -135,20 +136,23 @@ def run_ut(config): print_info_log("start UT test") print_info_log(f"UT task result will be saved in {config.result_csv_path}") print_info_log(f"UT task details will be saved in {config.details_csv_path}") - # compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut) - # with FileOpen(config.result_csv_path, 'r') as file: - # csv_reader = csv.reader(file) - # next(csv_reader) - # api_name_set = {row[0] for row in csv_reader} + compare = Comparator(config.result_csv_path, config.details_csv_path, config.is_continue_run_ut) + with FileOpen(config.result_csv_path, 'r') as file: + csv_reader = csv.reader(file) + next(csv_reader) + api_name_set = {row[0] for row in csv_reader} for i, (api_full_name, api_info_dict) in enumerate(tqdm(config.forward_content.items(), **tqdm_params)): try: + [_, api_name, _] = api_full_name.split(Const.DELIMITER) + if api_name in set(white_list): + continue print(api_full_name) data_info = run_paddle_api(api_full_name, config.real_data_path, api_info_dict) - # is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, - # data_info.bench_output, - # data_info.device_output, - # data_info.bench_grad, - # data_info.device_grad) + is_fwd_success, is_bwd_success = compare.compare_output(api_full_name, + data_info.bench_output, + data_info.device_output, + data_info.bench_grad, + data_info.device_grad) except Exception as err: [_, api_name, _] = api_full_name.split("*") if "expected scalar type Long" in str(err): From 754c06215adba8b2eb3a081f703ff4cf20a05719 Mon Sep 17 00:00:00 2001 From: zhangjian <1032674385@qq.com> Date: Thu, 13 Jun 2024 14:22:47 +0800 Subject: [PATCH 2/5] update the compare method. add ULP compare standard for mulplication operator (include linear) --- Acc/api_precision_compare.py | 274 +++++++++--- Acc/compare/algorithm.py | 18 +- Acc/compare/api_precision_standard.yaml | 24 + Acc/compare/api_precision_threshold.yaml | 540 +++++++++++------------ Acc/compare/compare.py | 32 +- Acc/compare/compare_column.py | 18 +- Acc/compare/compare_utils.py | 51 ++- 7 files changed, 622 insertions(+), 335 deletions(-) diff --git a/Acc/api_precision_compare.py b/Acc/api_precision_compare.py index b944bd6..4521920 100644 --- a/Acc/api_precision_compare.py +++ b/Acc/api_precision_compare.py @@ -4,12 +4,13 @@ import csv import math from collections import namedtuple +import paddle import pandas as pd from compare.compare_utils import CompareConst, API_PRECISION_COMPARE_RESULT_FILE_NAME, \ API_PRECISION_COMPARE_DETAILS_FILE_NAME, BENCHMARK_COMPARE_SUPPORT_LIST, API_PRECISION_COMPARE_UNSUPPORT_LIST, \ - ApiPrecisionCompareColumn, AbsoluteStandardApi, BinaryStandardApi, BINARY_COMPARE_UNSUPPORT_LIST, \ - convert_str_to_float, CompareMessage + ApiPrecisionCompareColumn, AbsoluteStandardApi, BinaryStandardApi, ULPStandardApi, ThousandthStandardApi, \ + BINARY_COMPARE_UNSUPPORT_LIST, ULP_COMPARE_SUPPORT_LIST, convert_str_to_float, CompareMessage, is_inf_or_nan from compare.compare_column import ApiPrecisionOutputColumn from compare.compare_dependency import get_validated_result_csv_path @@ -66,7 +67,53 @@ } -class BenchmarkStandard: +class Standard: + @staticmethod + def _calc_ratio(column_name, x, y, default_value): + ''' + 计算npu侧和gpu侧统计量的比值 + 输入: + column_name:统计量名称 + x:npu侧统计量 + y:gpu侧统计量 + default:当x不接近0,y接近0,设置的比值默认值 + 输出: + ratio:统计量x和y的比值 + inf_nan_consistency:不出现inf或nan时为True,出现inf或nan时必须同时为inf或-inf或nan才为True,否则为False + message:当出现inf或nan时的提示信息 + ''' + x, y = convert_str_to_float(x), convert_str_to_float(y) + if is_inf_or_nan(x) or is_inf_or_nan(y): + if math.isnan(x) or math.isnan(y): + if math.isnan(x) and math.isnan(y): + return float("nan"), True, f"{column_name}同为同号inf或nan\n" + else: + return float("nan"), False, f"{column_name}inf或nan不一致\n" + else: + if math.isinf(x) and math.isinf(y): + if x == y: + return float("nan"), True, f"{column_name}同为同号inf或nan\n" + else: + return float("nan"), False, f"{column_name}inf或nan不一致\n" + elif math.isinf(x): + if y >= 0: + return x, False, f"{column_name}inf或nan不一致\n" + else: + return -x, False, f"{column_name}inf或nan不一致\n" + else: + return abs(x / y), False, f"{column_name}inf或nan不一致\n" + inf_nan_consistency = True + message = "" + if math.isclose(y, 0.0): + if math.isclose(x, 0.0): + return 1.0, inf_nan_consistency, message + else: + return default_value, inf_nan_consistency, message + else: + return abs(x / y), inf_nan_consistency, message + + +class BenchmarkStandard(Standard): def __init__(self, api_name, npu_precision, gpu_precision): self.api_name = api_name self.npu_precision = npu_precision @@ -83,38 +130,65 @@ def __init__(self, api_name, npu_precision, gpu_precision): self.eb_status = CompareConst.PASS self.check_result_list = [] self.final_result = CompareConst.PASS + self.compare_message = "" def __str__(self): - return "%s" % (self.api_name) + return f"{self.api_name}" def get_result(self): - self._compare_ratio() - self.small_value_err_status = self._get_status(self.small_value_err_ratio, 'small_value') + small_value_inf_nan_consistency, rmse_inf_nan_consistency, \ + max_rel_inf_nan_consistency, mean_rel_inf_nan_consistency, eb_inf_nan_consistency = self._compare_ratio() + if small_value_inf_nan_consistency: + self.small_value_err_status = self._get_status(self.small_value_err_ratio, 'small_value') + else: + self.small_value_err_status = CompareConst.ERROR self.check_result_list.append(self.small_value_err_status) - self.rmse_status = self._get_status(self.rmse_ratio, 'rmse') + if rmse_inf_nan_consistency: + self.rmse_status = self._get_status(self.rmse_ratio, 'rmse') + else: + self.rmse_status = CompareConst.ERROR self.check_result_list.append(self.rmse_status) - self.max_rel_err_status = self._get_status(self.max_rel_err_ratio, 'max_rel_err') + if max_rel_inf_nan_consistency: + self.max_rel_err_status = self._get_status(self.max_rel_err_ratio, 'max_rel_err') + else: + self.max_rel_err_status = CompareConst.ERROR self.check_result_list.append(self.max_rel_err_status) - self.mean_rel_err_status = self._get_status(self.mean_rel_err_ratio, 'mean_rel_err') + if mean_rel_inf_nan_consistency: + self.mean_rel_err_status = self._get_status(self.mean_rel_err_ratio, 'mean_rel_err') + else: + self.mean_rel_err_status = CompareConst.ERROR self.check_result_list.append(self.mean_rel_err_status) - self.eb_status = self._get_status(self.eb_ratio, 'eb') + if eb_inf_nan_consistency: + self.eb_status = self._get_status(self.eb_ratio, 'eb') + else: + self.eb_status = CompareConst.ERROR if CompareConst.ERROR in self.check_result_list: self.final_result = CompareConst.ERROR elif CompareConst.WARNING in self.check_result_list: self.final_result = CompareConst.WARNING def _compare_ratio(self): - self.small_value_err_ratio = self._calc_ratio( + self.small_value_err_ratio, small_value_inf_nan_consistency, small_value_message = self._calc_ratio(ApiPrecisionCompareColumn.SMALL_VALUE_ERROR_RATE, self.npu_precision.get(ApiPrecisionCompareColumn.SMALL_VALUE_ERROR_RATE), self.gpu_precision.get(ApiPrecisionCompareColumn.SMALL_VALUE_ERROR_RATE), 10000.0) - self.rmse_ratio = self._calc_ratio(self.npu_precision.get(ApiPrecisionCompareColumn.RMSE), - self.gpu_precision.get(ApiPrecisionCompareColumn.RMSE), 10000.0) - self.max_rel_err_ratio = self._calc_ratio(self.npu_precision.get(ApiPrecisionCompareColumn.MAX_REL_ERR), - self.gpu_precision.get(ApiPrecisionCompareColumn.MAX_REL_ERR), 10000.0) - self.mean_rel_err_ratio = self._calc_ratio(self.npu_precision.get(ApiPrecisionCompareColumn.MEAN_REL_ERR), - self.gpu_precision.get(ApiPrecisionCompareColumn.MEAN_REL_ERR), 10000.0) - self.eb_ratio = self._calc_ratio(self.npu_precision.get(ApiPrecisionCompareColumn.EB), - self.gpu_precision.get(ApiPrecisionCompareColumn.EB), 10000.0) + self.compare_message += small_value_message + self.rmse_ratio, rmse_inf_nan_consistency, rmse_message = self._calc_ratio(ApiPrecisionCompareColumn.RMSE, + self.npu_precision.get(ApiPrecisionCompareColumn.RMSE), + self.gpu_precision.get(ApiPrecisionCompareColumn.RMSE), 10000.0) + self.compare_message += rmse_message + self.max_rel_err_ratio, max_rel_inf_nan_consistency, max_rel_message = self._calc_ratio(ApiPrecisionCompareColumn.MAX_REL_ERR, + self.npu_precision.get(ApiPrecisionCompareColumn.MAX_REL_ERR), + self.gpu_precision.get(ApiPrecisionCompareColumn.MAX_REL_ERR), 10000.0) + self.compare_message += max_rel_message + self.mean_rel_err_ratio, mean_rel_inf_nan_consistency, mean_rel_message = self._calc_ratio(ApiPrecisionCompareColumn.MEAN_REL_ERR, + self.npu_precision.get(ApiPrecisionCompareColumn.MEAN_REL_ERR), + self.gpu_precision.get(ApiPrecisionCompareColumn.MEAN_REL_ERR), 10000.0) + self.compare_message += mean_rel_message + self.eb_ratio, eb_inf_nan_consistency, eb_message = self._calc_ratio(ApiPrecisionCompareColumn.EB, + self.npu_precision.get(ApiPrecisionCompareColumn.EB), + self.gpu_precision.get(ApiPrecisionCompareColumn.EB), 10000.0) + self.compare_message += eb_message + return small_value_inf_nan_consistency, rmse_inf_nan_consistency, max_rel_inf_nan_consistency, mean_rel_inf_nan_consistency, eb_inf_nan_consistency def to_column_value(self): return [self.small_value_err_ratio, self.small_value_err_status, self.rmse_ratio, @@ -123,6 +197,8 @@ def to_column_value(self): @staticmethod def _get_status(ratio, algorithm): + if math.isnan(ratio) or math.isinf(ratio): + return CompareConst.PASS error_threshold = benchmark_algorithms_thresholds.get(algorithm).get('error_threshold') warning_threshold = benchmark_algorithms_thresholds.get(algorithm).get('warning_threshold') if ratio > error_threshold: @@ -131,13 +207,70 @@ def _get_status(ratio, algorithm): return CompareConst.WARNING return CompareConst.PASS - @staticmethod - def _calc_ratio(x, y, default_value=1.0): - x, y = convert_str_to_float(x), convert_str_to_float(y) - if math.isclose(y, 0.0): - return 1.0 if math.isclose(x, 0.0) else default_value + +class ULPStandard(Standard): + def __init__(self, api_name, npu_precision, gpu_precision): + self.api_name = api_name + self.npu_precision = npu_precision + self.gpu_precision = gpu_precision + self.mean_ulp_err = 0 + self.ulp_err_proportion = 0 + self.ulp_err_proportion_ratio = 1 + self.ulp_err_status = CompareConst.PASS + self.compare_message = "" + + def __str__(self): + return f"{self.api_name}" + + def get_result(self): + self.mean_ulp_err = convert_str_to_float(self.npu_precision.get(ApiPrecisionCompareColumn.MEAN_ULP_ERR)) + gpu_mean_ulp_err = convert_str_to_float(self.gpu_precision.get(ApiPrecisionCompareColumn.MEAN_ULP_ERR)) + inf_nan_consistency = True + if is_inf_or_nan(self.mean_ulp_err) or is_inf_or_nan(gpu_mean_ulp_err): + if math.isnan(self.mean_ulp_err) or math.isnan(gpu_mean_ulp_err): + if math.isnan(self.mean_ulp_err) and math.isnan(gpu_mean_ulp_err): + inf_nan_consistency = True + self.compare_message += f"{ApiPrecisionCompareColumn.MEAN_ULP_ERR}同为同号inf或nan\n" + else: + inf_nan_consistency = False + self.compare_message += f"{ApiPrecisionCompareColumn.MEAN_ULP_ERR}inf或nan不一致\n" + else: + if self.mean_ulp_err == gpu_mean_ulp_err: + inf_nan_consistency = True + self.compare_message += f"{ApiPrecisionCompareColumn.MEAN_ULP_ERR}同为同号inf或nan\n" + else: + inf_nan_consistency = False + self.compare_message += f"{ApiPrecisionCompareColumn.MEAN_ULP_ERR}inf或nan不一致\n" + self.ulp_err_proportion = convert_str_to_float(self.npu_precision.get(ApiPrecisionCompareColumn.ULP_ERR_PROPORTION)) + self.ulp_err_proportion_ratio, ulp_inf_nan_consistency, message = self._calc_ratio(ApiPrecisionCompareColumn.ULP_ERR_PROPORTION, + self.npu_precision.get(ApiPrecisionCompareColumn.ULP_ERR_PROPORTION), + self.gpu_precision.get(ApiPrecisionCompareColumn.ULP_ERR_PROPORTION), 10000.0) + inf_nan_consistency = inf_nan_consistency and ulp_inf_nan_consistency + self.compare_message += message + if inf_nan_consistency: + self.ulp_err_status = self.get_ulp_status(self.npu_precision.get(ApiPrecisionCompareColumn.DEVICE_DTYPE)) + else: + self.ulp_err_status = CompareConst.ERROR + + def get_ulp_status(self, dtype): + if dtype == paddle.float32: + if self.mean_ulp_err < 64: + return CompareConst.PASS + elif self.ulp_err_proportion < 0.05: + return CompareConst.PASS + elif self.ulp_err_proportion_ratio < 1: + return CompareConst.PASS + else: + self.compare_message += "ERROR: ULP误差不满足标准\n" + return CompareConst.ERROR else: - return abs(x / y) + if self.ulp_err_proportion < 0.001: + return CompareConst.PASS + elif self.ulp_err_proportion_ratio < 1: + return CompareConst.PASS + else: + self.compare_message += "ERROR: ULP误差不满足标准\n" + return CompareConst.ERROR def write_detail_csv(content, save_path): @@ -176,13 +309,13 @@ def api_precision_compare(config): def analyse_csv(npu_data, gpu_data, config): forward_status, backward_status = [], [] - full_last_api_name, last_api_dtype = None, None + last_api_name, last_api_dtype = None, None for _, row_npu in npu_data.iterrows(): message = '' compare_column = ApiPrecisionOutputColumn() full_api_name_with_direction_status = row_npu[ApiPrecisionCompareColumn.API_NAME] row_gpu = gpu_data[gpu_data[ApiPrecisionCompareColumn.API_NAME] == full_api_name_with_direction_status] - full_api_name, direction_status, _, _ = full_api_name_with_direction_status.split(".") + _, api_name, _, direction_status, _, _ = full_api_name_with_direction_status.split(".") if row_gpu.empty: print_warn_log(f'This API : {full_api_name_with_direction_status} does not exist in the GPU data.') continue @@ -190,38 +323,46 @@ def analyse_csv(npu_data, gpu_data, config): msg = f'This API : {full_api_name_with_direction_status} has multiple records in the GPU data.' raise CompareException(CompareException.INVALID_DATA_ERROR, msg) row_gpu = row_gpu.iloc[0] + new_status = CompareConst.SPACE #当前API的输出为空(例如反向过程中requires_grad=False),跳过比对 if row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE].isspace(): - continue - _, api_name, _ = full_api_name.split("*") - new_status = CompareConst.SPACE - compare_column.api_name = full_api_name_with_direction_status - if row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE] not in BINARY_COMPARE_UNSUPPORT_LIST or api_name in BinaryStandardApi: - new_status = record_binary_consistency_result(api_name, compare_column, row_npu) - elif api_name in AbsoluteStandardApi: - new_status = record_absolute_threshold_result(compare_column, row_npu) - elif row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE] in BENCHMARK_COMPARE_SUPPORT_LIST: - bs = BenchmarkStandard(full_api_name_with_direction_status, row_npu, row_gpu) - new_status = record_benchmark_compare_result(compare_column, bs) - write_detail_csv(compare_column.to_column_value(), config.details_csv_path) - - if full_last_api_name is not None and full_api_name != full_last_api_name: + compare_column.api_name = full_api_name_with_direction_status + compare_column.compare_result = CompareConst.SKIP + compare_column.compare_message = row_npu[ApiPrecisionCompareColumn.MESSAGE] + new_status = CompareConst.SKIP + write_detail_csv(compare_column.to_column_value(), config.details_csv_path) + else: + compare_column.api_name = full_api_name_with_direction_status + if api_name in ThousandthStandardApi: + new_status = record_thousandth_threshold_result(compare_column, row_npu) + elif row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE] not in BINARY_COMPARE_UNSUPPORT_LIST or api_name in BinaryStandardApi: + new_status = record_binary_consistency_result(api_name, compare_column, row_npu) + elif api_name in AbsoluteStandardApi: + new_status = record_absolute_threshold_result(compare_column, row_npu) + elif api_name in ULPStandardApi and row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE] in ULP_COMPARE_SUPPORT_LIST: + us = ULPStandard(full_api_name_with_direction_status, row_npu, row_gpu) + new_status = record_ulp_compare_result(compare_column, us) + elif row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE] in BENCHMARK_COMPARE_SUPPORT_LIST: + bs = BenchmarkStandard(full_api_name_with_direction_status, row_npu, row_gpu) + new_status = record_benchmark_compare_result(compare_column, bs) + write_detail_csv(compare_column.to_column_value(), config.details_csv_path) + + if last_api_name is not None and api_name != last_api_name: if last_api_dtype in API_PRECISION_COMPARE_UNSUPPORT_LIST: message = unsupported_message - write_csv([[full_last_api_name, "skip", "skip", message]], config.result_csv_path) + write_csv([[last_api_name, "skip", "skip", message]], config.result_csv_path) forward_status, backward_status = [], [] message = '' else: forward_result = get_api_checker_result(forward_status) backward_result = get_api_checker_result(backward_status) - _, last_api_name, _ = full_last_api_name.split("*") message += CompareMessage.get(last_api_name, "") if forward_result == CompareConst.ERROR else "" - write_csv([[full_last_api_name, forward_result, backward_result, message]], config.result_csv_path) + write_csv([[last_api_name, forward_result, backward_result, message]], config.result_csv_path) forward_status, backward_status = [], [] message = '' is_supported = row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE] not in API_PRECISION_COMPARE_UNSUPPORT_LIST - full_last_api_name = full_api_name + last_api_name = api_name last_api_dtype = row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE] if not is_supported: @@ -234,16 +375,15 @@ def analyse_csv(npu_data, gpu_data, config): else: print_error_log(f"Invalid direction status: {direction_status}") - if full_last_api_name is not None: + if last_api_name is not None: if last_api_dtype in API_PRECISION_COMPARE_UNSUPPORT_LIST: message = unsupported_message - write_csv([[full_last_api_name, "skip", "skip", message]], config.result_csv_path) + write_csv([[last_api_name, "skip", "skip", message]], config.result_csv_path) else: forward_result = get_api_checker_result(forward_status) backward_result = get_api_checker_result(backward_status) - _, last_api_name, _ = full_last_api_name.split("*") message += CompareMessage.get(last_api_name, "") if forward_result == CompareConst.ERROR else "" - write_csv([[full_last_api_name, forward_result, backward_result, message]], config.result_csv_path) + write_csv([[last_api_name, forward_result, backward_result, message]], config.result_csv_path) def check_error_rate(npu_error_rate): @@ -278,6 +418,8 @@ def get_absolute_threshold_result(row_npu): def get_api_checker_result(status): if not status: return CompareConst.SPACE + if all(item == CompareConst.SKIP for item in status): + return CompareConst.SKIP for const in (CompareConst.ERROR, CompareConst.WARNING): if const in status: return const @@ -288,7 +430,7 @@ def check_csv_columns(columns, csv_type): required_columns = ApiPrecisionCompareColumn.to_required_columns() missing_columns = [column for column in required_columns if column not in columns] if missing_columns: - msg = f"The followint columns {','.join(missing_columns)} are missing in{csv_type}" + msg = f"The following columns {','.join(missing_columns)} are missing in{csv_type}" raise CompareException(CompareException.INVALID_DATA_ERROR, msg) @@ -341,11 +483,39 @@ def record_benchmark_compare_result(compare_column, bs): compare_column.eb_status = bs.eb_status compare_column.compare_result = bs.final_result compare_column.compare_algorithm = "标杆比对法" - message = '' + compare_column.compare_message = bs.compare_message for status_attr, messages in benchmark_message.items(): status_value = getattr(compare_column, status_attr) if status_value in messages: - message += messages[status_value] + compare_column.compare_message += messages[status_value] + return compare_column.compare_result + + +def record_ulp_compare_result(compare_column, us): + us.get_result() + compare_column.mean_ulp_err = us.mean_ulp_err + compare_column.ulp_err_proportion = us.ulp_err_proportion + compare_column.ulp_err_proportion_ratio = us.ulp_err_proportion_ratio + compare_column.ulp_err_status = us.ulp_err_status + compare_column.compare_result = us.ulp_err_status + compare_column.compare_algorithm = "ULP误差比对法" + compare_column.compare_message = us.compare_message + return compare_column.compare_result + + +def check_thousandth_rate(thousandth_rate): + return CompareConst.PASS if convert_str_to_float(thousandth_rate) >= 0.999 else CompareConst.ERROR + + +def record_thousandth_threshold_result(compare_column, row_npu): + new_status = check_thousandth_rate(row_npu[ApiPrecisionCompareColumn.REL_ERR_THOUSANDTH]) + compare_column.rel_err_thousandth = row_npu[ApiPrecisionCompareColumn.REL_ERR_THOUSANDTH] + compare_column.rel_err_thousandth_status = new_status + compare_column.compare_result = new_status + compare_column.compare_algorithm = "双千指标法" + message = '' + if compare_column.rel_err_thousandth_status == CompareConst.ERROR: + message += "ERROR: 双千指标不达标\n" compare_column.compare_message = message return compare_column.compare_result diff --git a/Acc/compare/algorithm.py b/Acc/compare/algorithm.py index 883740e..e2c7ebb 100644 --- a/Acc/compare/algorithm.py +++ b/Acc/compare/algorithm.py @@ -1,7 +1,7 @@ # 定义比对算法及比对标准 import paddle import numpy as np -from compare.compare_utils import CompareConst, check_dtype_comparable +from compare.compare_utils import CompareConst, check_dtype_comparable, ULP_PARAMETERS #cos @@ -188,3 +188,19 @@ def check_norm_value(normal_value_mask, rel_err, rtol): err_mask = np.logical_and(err_mask, normal_value_mask) err_cnt = np.sum(err_mask) return 0 if np.sum(normal_value_mask) == 0 else err_cnt / np.sum(normal_value_mask) + + +def get_ulp_err(bench_output, device_output, dtype): + parameters = ULP_PARAMETERS.get(dtype) + min_eb = (parameters.get('min_eb'))[0] + exponent_num = (parameters.get('exponent_num'))[0] + abs_bench = np.abs(bench_output) + eb = np.where(abs_bench == 0, 0, np.floor(np.log2(abs_bench))) + eb = np.maximum(eb, min_eb) + + if dtype == paddle.float32: + ulp_err = (device_output.astype(np.float64) - bench_output).astype(np.float64) * np.exp2(-eb + exponent_num).astype(np.float64) + else: + ulp_err = (device_output.astype(np.float32) - bench_output).astype(np.float32) * np.exp2(-eb + exponent_num).astype(np.float32) + ulp_err = np.abs(ulp_err) + return ulp_err diff --git a/Acc/compare/api_precision_standard.yaml b/Acc/compare/api_precision_standard.yaml index 3dafc52..9641ea1 100644 --- a/Acc/compare/api_precision_standard.yaml +++ b/Acc/compare/api_precision_standard.yaml @@ -105,3 +105,27 @@ BinaryCompareStandard: - tril_ - triu - triu_ + - type_as + +ULPStandard: + - __matmul__ + - addbmm + - addbmm_ + - addmm + - addmm_ + - baddbmm + - baddbmm_ + - bilinear + - bmm + - chain_matmul + - hspmm + - linear + - matmul + - mm + - mv + - smm + - sspaddmm + +ThousandthStandard: + - conv1d + - conv2d diff --git a/Acc/compare/api_precision_threshold.yaml b/Acc/compare/api_precision_threshold.yaml index 74c9692..d04b7c8 100644 --- a/Acc/compare/api_precision_threshold.yaml +++ b/Acc/compare/api_precision_threshold.yaml @@ -1,390 +1,390 @@ mul: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 mul_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __mul__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __imul__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __rmul__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 add: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 add_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __add__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __iadd__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __radd__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 div: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 div_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __div__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __idiv__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 divide: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 divide_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 leaky_relu: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 leaky_relu_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 prelu: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 reciprocal: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 reciprocal_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 rsqrt: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 rsqrt_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 square: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 square_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 sub: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 sub_: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 rsub: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __isub__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 __sub__: paddle.float32: - rtol: 0.000001 - small_value: 0.000001 - small_value_atol: 0.000001 + rtol: 1.0e-6 + small_value: 1.0e-6 + small_value_atol: 1.0e-9 paddle.float16: - rtol: 0.001 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 1.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 paddle.bfloat16: - rtol: 0.004 - small_value: 0.001 - small_value_atol: 0.001 + rtol: 4.0e-3 + small_value: 1.0e-3 + small_value_atol: 1.0e-5 diff --git a/Acc/compare/compare.py b/Acc/compare/compare.py index f4e1ad4..7be6e45 100644 --- a/Acc/compare/compare.py +++ b/Acc/compare/compare.py @@ -6,13 +6,14 @@ from rich.table import Table from rich.console import Console -from compare.compare_utils import CompareConst, check_dtype_comparable, DETAIL_TEST_ROWS, \ - precision_configs, BENCHMARK_COMPARE_SUPPORT_LIST, AbsoluteStandardApi, BinaryStandardApi, apis_threshold +from compare.compare_utils import (CompareConst, check_dtype_comparable, DETAIL_TEST_ROWS, \ + precision_configs, BENCHMARK_COMPARE_SUPPORT_LIST, AbsoluteStandardApi, BinaryStandardApi, ThousandthStandardApi, \ + ULPStandardApi, apis_threshold) from compare.compare_column import CompareColumn from compare.algorithm import get_rmse, get_error_balance, get_max_rel_err, get_mean_rel_err, \ get_rel_err, get_abs_err, get_max_abs_err, get_rel_err_ratio, cosine_sim, get_rel_err_origin, \ get_small_value_err_ratio, get_finite_and_infinite_mask, get_small_value_mask, check_inf_nan_value, \ - check_small_value, check_norm_value, get_abs_bench_with_eps + check_small_value, check_norm_value, get_abs_bench_with_eps, get_ulp_err from compare.compare_dependency import get_json_contents, write_csv, print_warn_log from compare.compare_dependency import FileOpen @@ -40,8 +41,9 @@ def __init__(self, result_csv_path, details_csv_path, is_continue_run_ut, stack_ self.stack_info = None self.test_result_cnt = { - "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, "success_num": 0, - "total_num": 0, "forward_or_backward_fail_num": 0 + "success_num": 0, "warning_num": 0, "error_num": 0, + "forward_fail_num": 0, "backward_fail_num": 0, "forward_and_backward_fail_num": 0, + "total_num": 0, "total_skip_num": 0 } def print_pretest_result(self): @@ -274,6 +276,10 @@ def _compare_float_tensor(self, api_name, bench_output, device_output, compare_c message = "" abs_bench, abs_bench_with_eps = get_abs_bench_with_eps(bench_output, dtype) abs_err = get_abs_err(bench_output, device_output) + rel_err_orign = get_rel_err_origin(abs_err, abs_bench_with_eps) + if api_name in ThousandthStandardApi: + thousand_res, thousand_status = get_rel_err_ratio(rel_err_orign, 0.001) + compare_column.rel_err_thousandth = thousand_res if str(dtype) in BENCHMARK_COMPARE_SUPPORT_LIST: both_finite_mask, inf_nan_mask = get_finite_and_infinite_mask(bench_output, device_output) if api_name in BinaryStandardApi: @@ -288,6 +294,19 @@ def _compare_float_tensor(self, api_name, bench_output, device_output, compare_c compare_column.inf_nan_error_ratio = check_inf_nan_value(inf_nan_mask, bench_output, device_output, dtype, rtol) compare_column.rel_err_ratio = check_norm_value(normal_value_mask, rel_err, rtol) compare_column.abs_err_ratio = check_small_value(abs_err, small_value_mask, small_value_atol) + elif api_name in ULPStandardApi: + if bench_output.size == 0: + compare_column.max_ulp_error = 0 + compare_column.mean_ulp_error = 0 + compare_column.ulp_error_proportion = 0 + else: + ulp_err = get_ulp_err(bench_output, device_output, dtype) + compare_column.max_ulp_error = np.max(ulp_err) + compare_column.mean_ulp_error = np.mean(ulp_err) + if dtype == paddle.float32: + compare_column.ulp_error_proportion = np.sum(ulp_err > 32) / bench_output.size + else: + compare_column.ulp_error_proportion = np.sum(ulp_err > 1) / bench_output.size else: dtype_config = precision_configs.get(dtype) small_value_mask = get_small_value_mask(abs_bench, both_finite_mask, dtype_config['small_value'][0]) @@ -296,6 +315,8 @@ def _compare_float_tensor(self, api_name, bench_output, device_output, compare_c rel_err = get_rel_err(abs_err, abs_bench_with_eps, small_value_mask, inf_nan_mask) compare_column.RMSE = get_rmse(abs_err, np.logical_or(inf_nan_mask, small_value_mask)) compare_column.EB = get_error_balance(bench_output, device_output) + if rel_err.size == 0: + return CompareConst.ERROR, compare_column, "Relative error result list is empty." compare_column.Max_rel_error = get_max_rel_err(rel_err) compare_column.Mean_rel_error = get_mean_rel_err(rel_err) @@ -312,7 +333,6 @@ def _compare_float_tensor(self, api_name, bench_output, device_output, compare_c message += "Max abs error is less than 0.001, consider as pass, skip other check and set to SPACE.\n" return CompareConst.PASS, compare_column, message - rel_err_orign = get_rel_err_origin(abs_err, abs_bench_with_eps) if dtype in [paddle.float16, paddle.bfloat16]: hundred_res, hundred_status = get_rel_err_ratio(rel_err_orign, 0.01) compare_column.rel_err_hundredth = hundred_res diff --git a/Acc/compare/compare_column.py b/Acc/compare/compare_column.py index 3513179..65d6036 100644 --- a/Acc/compare/compare_column.py +++ b/Acc/compare/compare_column.py @@ -20,12 +20,16 @@ def __init__(self): self.inf_nan_error_ratio = CompareConst.SPACE self.rel_err_ratio = CompareConst.SPACE self.abs_err_ratio = CompareConst.SPACE + self.max_ulp_error = CompareConst.SPACE + self.mean_ulp_error = CompareConst.SPACE + self.ulp_error_proportion = CompareConst.SPACE def to_column_value(self, is_pass, message): return [self.bench_type, self.npu_type, self.shape, self.cosine_sim, self.max_abs_err, self.rel_err_hundredth, self.rel_err_thousandth, self.rel_err_ten_thousandth, self.error_rate, self.EB, self.RMSE, self.small_value_err_ratio, self.Max_rel_error, self.Mean_rel_error, self.inf_nan_error_ratio, - self.rel_err_ratio, self.abs_err_ratio, is_pass, message] + self.rel_err_ratio, self.abs_err_ratio, self.max_ulp_error, self.mean_ulp_error, + self.ulp_error_proportion, is_pass, message] class ApiPrecisionOutputColumn: @@ -49,6 +53,12 @@ def __init__(self): self.abs_err_ratio_status = CompareConst.SPACE self.error_rate = CompareConst.SPACE self.error_rate_status = CompareConst.SPACE + self.mean_ulp_err = CompareConst.SPACE + self.ulp_err_proportion = CompareConst.SPACE + self.ulp_err_proportion_ratio = CompareConst.SPACE + self.ulp_err_status = CompareConst.SPACE + self.rel_err_thousandth = CompareConst.SPACE + self.rel_err_thousandth_status = CompareConst.SPACE self.compare_result = CompareConst.SPACE self.compare_algorithm = CompareConst.SPACE self.compare_message = CompareConst.SPACE @@ -58,6 +68,6 @@ def to_column_value(self): self.rmse_status, self.max_rel_err_ratio, self.max_rel_err_status, self.mean_rel_err_ratio, self.mean_rel_err_status, self.eb_ratio, self.eb_status, self.inf_nan_error_ratio, self.inf_nan_error_ratio_status, self.rel_err_ratio, self.rel_err_ratio_status, self.abs_err_ratio, - self.abs_err_ratio_status, self.error_rate, self.error_rate_status, self.compare_result, - self.compare_algorithm, self.compare_message] - \ No newline at end of file + self.abs_err_ratio_status, self.error_rate, self.error_rate_status, self.mean_ulp_err, + self.ulp_err_proportion, self.ulp_err_proportion_ratio, self.ulp_err_status, self.rel_err_thousandth, + self.rel_err_thousandth_status, self.compare_result, self.compare_algorithm, self.compare_message] diff --git a/Acc/compare/compare_utils.py b/Acc/compare/compare_utils.py index bbef032..fc92919 100644 --- a/Acc/compare/compare_utils.py +++ b/Acc/compare/compare_utils.py @@ -14,6 +14,7 @@ API_PRECISION_COMPARE_DETAILS_FILE_NAME = "api_precision_compare_details_" + current_time + ".csv" BENCHMARK_COMPARE_SUPPORT_LIST = ['paddle.float16', 'paddle.bfloat16', 'paddle.float32'] API_PRECISION_COMPARE_UNSUPPORT_LIST = ['paddle.float64', 'paddle.complex64', 'paddle.complex128'] +ULP_COMPARE_SUPPORT_LIST = ['paddle.float16', 'paddle.bfloat16', 'paddle.float32'] BINARY_COMPARE_UNSUPPORT_LIST = BENCHMARK_COMPARE_SUPPORT_LIST + API_PRECISION_COMPARE_UNSUPPORT_LIST @@ -23,6 +24,8 @@ Apis = yaml.safe_load(f) AbsoluteStandardApi = Apis.get('AbsoluteThreshStandard') BinaryStandardApi = Apis.get('BinaryCompareStandard') + ULPStandardApi = Apis.get('ULPStandard') + ThousandthStandardApi = Apis.get('ThousandthStandard') threshold_yaml_path = os.path.join(cur_path, "api_precision_threshold.yaml") @@ -46,6 +49,9 @@ "inf/nan错误率", "相对误差错误率", "绝对误差错误率", + "ULP误差最大值", + "ULP误差平均值", + "ULP误差大于阈值占比", "Status", "Message" ]] @@ -79,6 +85,34 @@ } +ULP_PARAMETERS = { + paddle.float16 : { + 'min_eb' : [ + -14 + ], + 'exponent_num' : [ + 10 + ] + }, + paddle.bfloat16 : { + 'min_eb' : [ + -126 + ], + 'exponent_num' : [ + 7 + ] + }, + paddle.float32 : { + 'min_eb' : [ + -126 + ], + 'exponent_num' : [ + 23 + ] + } +} + + class CompareConst: NAN = np.nan NA = "N/A" @@ -120,6 +154,12 @@ class ApiPrecisionCompareColumn: REL_ERR_RATIO_STATUS = '相对误差判定结果' ABS_ERR_RATIO = '绝对误差错误率' ABS_ERR_RATIO_STATUS = '绝对误差判定结果' + MEAN_ULP_ERR = 'ULP误差平均值' + ULP_ERR_PROPORTION = 'ULP误差大于阈值占比' + ULP_ERR_PROPORTION_RATIO = 'ULP误差大于阈值占比比值' + ULP_ERR_STATUS = 'ULP误差判定结果' + REL_ERR_THOUSANDTH = '双千指标' + REL_ERR_THOUSANDTH_STATUS = '双千指标判定结果' FINAL_RESULT = '比对结果' ALGORITHM = '比对算法' FORWWARD_STATUS = 'Forward Test Success' @@ -132,7 +172,9 @@ def to_required_columns(): ApiPrecisionCompareColumn.SMALL_VALUE_ERROR_RATE, ApiPrecisionCompareColumn.RMSE, ApiPrecisionCompareColumn.MAX_REL_ERR, ApiPrecisionCompareColumn.MEAN_REL_ERR, ApiPrecisionCompareColumn.EB, ApiPrecisionCompareColumn.ERROR_RATE, ApiPrecisionCompareColumn.INF_NAN_ERROR_RATIO, - ApiPrecisionCompareColumn.REL_ERR_RATIO, ApiPrecisionCompareColumn.ABS_ERR_RATIO] + ApiPrecisionCompareColumn.REL_ERR_RATIO, ApiPrecisionCompareColumn.ABS_ERR_RATIO, + ApiPrecisionCompareColumn.MEAN_ULP_ERR, ApiPrecisionCompareColumn.ULP_ERR_PROPORTION, + ApiPrecisionCompareColumn.REL_ERR_THOUSANDTH] @staticmethod def get_detail_csv_title(): @@ -146,6 +188,9 @@ def get_detail_csv_title(): ApiPrecisionCompareColumn.REL_ERR_RATIO, ApiPrecisionCompareColumn.REL_ERR_RATIO_STATUS, ApiPrecisionCompareColumn.ABS_ERR_RATIO, ApiPrecisionCompareColumn.ABS_ERR_RATIO_STATUS, ApiPrecisionCompareColumn.ERROR_RATE, ApiPrecisionCompareColumn.ERROR_RATE_STATUS, + ApiPrecisionCompareColumn.MEAN_ULP_ERR, ApiPrecisionCompareColumn.ULP_ERR_PROPORTION, + ApiPrecisionCompareColumn.ULP_ERR_PROPORTION_RATIO, ApiPrecisionCompareColumn.ULP_ERR_STATUS, + ApiPrecisionCompareColumn.REL_ERR_THOUSANDTH, ApiPrecisionCompareColumn.REL_ERR_THOUSANDTH_STATUS, ApiPrecisionCompareColumn.FINAL_RESULT, ApiPrecisionCompareColumn.ALGORITHM, ApiPrecisionCompareColumn.MESSAGE] @staticmethod @@ -189,4 +234,6 @@ def convert_str_to_float(input_data): except ValueError as e: msg = 'ERROR: Input data cannot be converted to float' raise CompareException(CompareException.INVALID_DATA_ERROR, msg) from e - \ No newline at end of file + +def is_inf_or_nan(x): + return math.isnan(x) or math.isinf(x) From a878c059fcaa59928df8f6ae31a1903625866f68 Mon Sep 17 00:00:00 2001 From: zhangjian <1032674385@qq.com> Date: Thu, 13 Jun 2024 14:52:18 +0800 Subject: [PATCH 3/5] ignore empty operator using config/white_list.yaml --- Acc/configs/white_list.yaml | 1 + Acc/run_ut/run_dualback_ut.py | 4 ++-- Acc/run_ut/run_ut.py | 4 ++-- Acc/run_ut/run_ut_utils.py | 12 +++++++++++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Acc/configs/white_list.yaml b/Acc/configs/white_list.yaml index 7e7a8aa..2c1718b 100644 --- a/Acc/configs/white_list.yaml +++ b/Acc/configs/white_list.yaml @@ -1,5 +1,6 @@ # White List Sample format +white_list: ['empty'] functional: - conv2d diff --git a/Acc/run_ut/run_dualback_ut.py b/Acc/run_ut/run_dualback_ut.py index 7c72326..45b8d7c 100644 --- a/Acc/run_ut/run_dualback_ut.py +++ b/Acc/run_ut/run_dualback_ut.py @@ -33,7 +33,7 @@ seed_all() current_time = time.strftime("%Y%m%d%H%M%S") -white_list = ['empty'] # 算子白名单 +from run_ut_utils import WhiteList # 算子白名单 tqdm_params = { "smoothing": 0, # 平滑进度条的预计剩余时间,取值范围0到1 @@ -117,7 +117,7 @@ def run_ut_save(forward_content, real_data_path, out_path, backend): Warning_list = [] try: [_, api_name, _] = api_full_name.split(Const.DELIMITER) - if api_name in set(white_list): + if api_name in set(WhiteList): continue print(api_full_name) run_paddle_api_save( diff --git a/Acc/run_ut/run_ut.py b/Acc/run_ut/run_ut.py index 5a040c8..7bbbf8f 100644 --- a/Acc/run_ut/run_ut.py +++ b/Acc/run_ut/run_ut.py @@ -25,7 +25,7 @@ DETAILS_FILE_NAME = f"accuracy_checking_details_" + current_time + ".csv" RunUTConfig = namedtuple('RunUTConfig', ['forward_content', 'backward_content', 'result_csv_path', 'details_csv_path', 'save_error_data', 'is_continue_run_ut', 'real_data_path']) -white_list = ['empty'] # 算子白名单 +from run_ut_utils import WhiteList # 算子白名单 tqdm_params = { 'smoothing': 0, # 平滑进度条的预计剩余时间,取值范围0到1 @@ -144,7 +144,7 @@ def run_ut(config): for i, (api_full_name, api_info_dict) in enumerate(tqdm(config.forward_content.items(), **tqdm_params)): try: [_, api_name, _] = api_full_name.split(Const.DELIMITER) - if api_name in set(white_list): + if api_name in set(WhiteList): continue print(api_full_name) data_info = run_paddle_api(api_full_name, config.real_data_path, api_info_dict) diff --git a/Acc/run_ut/run_ut_utils.py b/Acc/run_ut/run_ut_utils.py index d78642f..c6b1921 100644 --- a/Acc/run_ut/run_ut_utils.py +++ b/Acc/run_ut/run_ut_utils.py @@ -4,4 +4,14 @@ class Backward_Message: MULTIPLE_BACKWARD_MESSAGE = "Multiple backward is not supported." UNSUPPORT_BACKWARD_MESSAGE = "function with out=... arguments don't support automatic differentiation, skip backward." - NO_BACKWARD_RESULT_MESSAGE = "function backward result is None, skip backward." \ No newline at end of file + NO_BACKWARD_RESULT_MESSAGE = "function backward result is None, skip backward." + +import os +import yaml +from file_check_util import FileOpen +cur_path = os.path.dirname(os.path.realpath(__file__)) +yaml_path = os.path.join(os.path.dirname(cur_path),"configs","white_list.yaml") +WhiteList = [] +with FileOpen(yaml_path, 'r') as f: + Ops = yaml.safe_load(f) + WhiteList = Ops.get('white_list') From 6f143f902713e4a5683c4461852d792496467a6f Mon Sep 17 00:00:00 2001 From: zhangjian <1032674385@qq.com> Date: Thu, 13 Jun 2024 15:14:01 +0800 Subject: [PATCH 4/5] fix bug of whitelist --- Acc/run_ut/run_dualback_ut.py | 2 +- Acc/run_ut/run_ut.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Acc/run_ut/run_dualback_ut.py b/Acc/run_ut/run_dualback_ut.py index 45b8d7c..84f6d38 100644 --- a/Acc/run_ut/run_dualback_ut.py +++ b/Acc/run_ut/run_dualback_ut.py @@ -116,7 +116,7 @@ def run_ut_save(forward_content, real_data_path, out_path, backend): ): Warning_list = [] try: - [_, api_name, _] = api_full_name.split(Const.DELIMITER) + [_, api_name, _] = api_full_name.split('*') if api_name in set(WhiteList): continue print(api_full_name) diff --git a/Acc/run_ut/run_ut.py b/Acc/run_ut/run_ut.py index 7bbbf8f..dc5233c 100644 --- a/Acc/run_ut/run_ut.py +++ b/Acc/run_ut/run_ut.py @@ -143,7 +143,7 @@ def run_ut(config): api_name_set = {row[0] for row in csv_reader} for i, (api_full_name, api_info_dict) in enumerate(tqdm(config.forward_content.items(), **tqdm_params)): try: - [_, api_name, _] = api_full_name.split(Const.DELIMITER) + [_, api_name, _] = api_full_name.split('*') if api_name in set(WhiteList): continue print(api_full_name) From dec2498f15dcf345aaee6f04946533bcbdf4fee1 Mon Sep 17 00:00:00 2001 From: zhangjian <1032674385@qq.com> Date: Thu, 13 Jun 2024 16:47:09 +0800 Subject: [PATCH 5/5] fix bug after testing --- Acc/api_precision_compare.py | 3 ++- Acc/compare/compare_utils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Acc/api_precision_compare.py b/Acc/api_precision_compare.py index 4521920..3dbe5fd 100644 --- a/Acc/api_precision_compare.py +++ b/Acc/api_precision_compare.py @@ -315,7 +315,7 @@ def analyse_csv(npu_data, gpu_data, config): compare_column = ApiPrecisionOutputColumn() full_api_name_with_direction_status = row_npu[ApiPrecisionCompareColumn.API_NAME] row_gpu = gpu_data[gpu_data[ApiPrecisionCompareColumn.API_NAME] == full_api_name_with_direction_status] - _, api_name, _, direction_status, _, _ = full_api_name_with_direction_status.split(".") + full_api_name, direction_status, _, _ = full_api_name_with_direction_status.split(".") if row_gpu.empty: print_warn_log(f'This API : {full_api_name_with_direction_status} does not exist in the GPU data.') continue @@ -324,6 +324,7 @@ def analyse_csv(npu_data, gpu_data, config): raise CompareException(CompareException.INVALID_DATA_ERROR, msg) row_gpu = row_gpu.iloc[0] new_status = CompareConst.SPACE + _, api_name, _ = full_api_name.split("*") #当前API的输出为空(例如反向过程中requires_grad=False),跳过比对 if row_npu[ApiPrecisionCompareColumn.DEVICE_DTYPE].isspace(): compare_column.api_name = full_api_name_with_direction_status diff --git a/Acc/compare/compare_utils.py b/Acc/compare/compare_utils.py index fc92919..c3777dd 100644 --- a/Acc/compare/compare_utils.py +++ b/Acc/compare/compare_utils.py @@ -1,7 +1,7 @@ import time import os import numpy as np - +import math import paddle import yaml