From b63d2470fec0fe1ca568b894fc8ed551af15fdbc Mon Sep 17 00:00:00 2001 From: Xujinming Date: Mon, 22 Jul 2024 17:46:36 +0800 Subject: [PATCH 1/2] Fix max/min hijack --- paddleapex/api_tracer/api_info.py | 8 +++++--- paddleapex/api_tracer/hook_op.py | 2 ++ paddleapex/api_tracer/wrap_op/OPTemplate.py | 10 +++------- paddleapex/api_tracer/wrap_op/get_target_op.py | 2 -- paddleapex/api_tracer/wrap_op/hijack_tool.py | 10 +++++----- 5 files changed, 15 insertions(+), 17 deletions(-) create mode 100644 paddleapex/api_tracer/hook_op.py diff --git a/paddleapex/api_tracer/api_info.py b/paddleapex/api_tracer/api_info.py index df293fd..4bf4811 100644 --- a/paddleapex/api_tracer/api_info.py +++ b/paddleapex/api_tracer/api_info.py @@ -16,7 +16,7 @@ import numpy as np from paddleapex.api_tracer.Dump import dump_util from paddleapex.api_tracer.config import cfg - +from paddleapex.api_tracer.hook_op import HookOp Paddle_Type_Map = { "FP64": "paddle.float64", "FP32": "paddle.float32", @@ -134,8 +134,10 @@ def effi_analyze_tensor(self, arg): single_arg.update({"shape": arg.shape}) try: with paddle.no_grad(): - max_ = paddle.max(arg).item() - min_ = paddle.min(arg).item() + fun_max = getattr(HookOp, "wrap_paddle.max") + fun_min = getattr(HookOp, "wrap_paddle.min") + max_ = fun_max(arg).item() + min_ = fun_min(arg).item() except: max_ = 1 min_ = 0 diff --git a/paddleapex/api_tracer/hook_op.py b/paddleapex/api_tracer/hook_op.py new file mode 100644 index 0000000..57e694f --- /dev/null +++ b/paddleapex/api_tracer/hook_op.py @@ -0,0 +1,2 @@ +class HookOp: + pass \ No newline at end of file diff --git a/paddleapex/api_tracer/wrap_op/OPTemplate.py b/paddleapex/api_tracer/wrap_op/OPTemplate.py index 810c414..40aeaaa 100644 --- a/paddleapex/api_tracer/wrap_op/OPTemplate.py +++ b/paddleapex/api_tracer/wrap_op/OPTemplate.py @@ -13,13 +13,9 @@ # limitations under the License. import paddle.distributed as dist import paddle -from .. import config -from ..api_info import API - - -class HookOp: - pass - +from paddleapex.api_tracer import config +from paddleapex.api_tracer.api_info import API +from paddleapex.api_tracer.hook_op import HookOp cfg = config.cfg diff --git a/paddleapex/api_tracer/wrap_op/get_target_op.py b/paddleapex/api_tracer/wrap_op/get_target_op.py index 9b5c2f0..66a8054 100644 --- a/paddleapex/api_tracer/wrap_op/get_target_op.py +++ b/paddleapex/api_tracer/wrap_op/get_target_op.py @@ -44,7 +44,5 @@ def check_api_stack(self): def get_target_ops(self): self.api_to_catch = set(self.target_op) - set(self.ignored_op) - if cfg.profile_mode: - self.api_to_catch -= set(["paddle.max", "paddle.min"]) self.check_api_stack() return self.api_to_catch diff --git a/paddleapex/api_tracer/wrap_op/hijack_tool.py b/paddleapex/api_tracer/wrap_op/hijack_tool.py index 8dad0f2..4a346dd 100644 --- a/paddleapex/api_tracer/wrap_op/hijack_tool.py +++ b/paddleapex/api_tracer/wrap_op/hijack_tool.py @@ -13,11 +13,11 @@ # limitations under the License. -from .. import config -from ...utils import try_import -from .get_target_op import GetTargetOP -from .OPTemplate import OPTemplate, HookOp - +from paddleapex.api_tracer import config +from paddleapex.utils import try_import +from paddleapex.api_tracer.wrap_op.get_target_op import GetTargetOP +from paddleapex.api_tracer.wrap_op.OPTemplate import OPTemplate +from paddleapex.api_tracer.hook_op import HookOp cfg = config.cfg From 8c9ee8a119eff849d89b6250165ab5fa32b0f4d8 Mon Sep 17 00:00:00 2001 From: Xujinming Date: Mon, 22 Jul 2024 19:48:13 +0800 Subject: [PATCH 2/2] update memory view --- README.md | 2 +- README_CN.md | 2 +- paddleapex/apex/mem_cmp.py | 22 ++++++++--------- paddleapex/apex/run_paddle.py | 5 ++-- paddleapex/apex/summary_generator.py | 35 +++++++++++++++++++++++++++- paddleapex/test/test.sh | 8 +++---- 6 files changed, 52 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index a7b8b37..d5185c3 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ We provide a flow chart for Multi-end precision comparision. 3. Generate performance/accuracy summary: ```shell cd paddleapex/apex - python summary_generator.py -acc [acc_result] -prof [prof_detail] + python summary_generator.py -acc [acc_result] -prof [prof_detail] -mem [mem_detail] ``` diff --git a/README_CN.md b/README_CN.md index cf5f177..5a88cbb 100644 --- a/README_CN.md +++ b/README_CN.md @@ -139,7 +139,7 @@ python mem_cmp.py --benchmark [gpu_repo] --device [npu_repo] -o [result_path] 3. 生成性能\精度总结报告: ```shell cd paddleapex/apex - python summary_generator.py -acc [acc_result] -prof [prof_detail] + python summary_generator.py -acc [acc_result] -prof [prof_detail] -mem [mem_detail] ``` #### 直接比对的标准: diff --git a/paddleapex/apex/mem_cmp.py b/paddleapex/apex/mem_cmp.py index c163b63..932696d 100644 --- a/paddleapex/apex/mem_cmp.py +++ b/paddleapex/apex/mem_cmp.py @@ -84,15 +84,14 @@ def compare_command(args): def analyze_log(raw_data): res_dict = {} - pattern = r"^(.*?)\s*:\t(.*?)\n$" + for item in raw_data: - match = re.match(pattern, item) - if match: - api_name = match.group(1) - data = match.group(2) - res_dict[api_name] = data - else: - print("The format of log is not correct.") + single_op_dict = {} + item = item.replace('\n', '') + data_list = item.split("\t") + single_op_dict['dtype'] = data_list[1] + single_op_dict['memory'] = data_list[2] + res_dict[data_list[0]] = single_op_dict return res_dict @@ -120,11 +119,12 @@ def compare_device_bench( temp_dict = {} if key in mem_dict1.keys(): temp_dict["API Name"] = key - temp_dict["Bench Memory Usage (B)"] = mem_dict1[key] + temp_dict['dtype'] = mem_dict1[key]['dtype'] + temp_dict["Bench Memory Usage (B)"] = mem_dict1[key]['memory'] if key in mem_dict2.keys(): - temp_dict["Device Memory Usage (B)"] = mem_dict2[key] + temp_dict["Device Memory Usage (B)"] = mem_dict2[key]['memory'] if key in mem_dict1.keys(): - temp_dict["Memory Difference"] = abs(float(mem_dict1[key])-float(mem_dict2[key])) + temp_dict["Memory Difference"] = abs(float(mem_dict1[key]['memory'])-float(mem_dict2[key]['memory'])) else: temp_dict["Device Memory Usage (B)"] = "" temp_dict["Memory Difference"] = "" diff --git a/paddleapex/apex/run_paddle.py b/paddleapex/apex/run_paddle.py index 0b5f9d1..4bac7e6 100644 --- a/paddleapex/apex/run_paddle.py +++ b/paddleapex/apex/run_paddle.py @@ -472,9 +472,8 @@ def run_mem_case( os.mkdir(out_path) if not os.path.exists(out_path) else None F = open(log_path, "a") - dtype = "" if not enforce_dtype else f"*{enforce_dtype.name}" - op_name = api_call_name + dtype + ".forward" - F.write(f"{op_name}:\t{str(activation_cost)}\n") + dtype = "" if not enforce_dtype else f"{enforce_dtype.name}" + F.write(f"{api_call_name}\t{dtype}\t{str(activation_cost)}\n") F.close() return diff --git a/paddleapex/apex/summary_generator.py b/paddleapex/apex/summary_generator.py index 7162ceb..4af25a4 100644 --- a/paddleapex/apex/summary_generator.py +++ b/paddleapex/apex/summary_generator.py @@ -14,11 +14,19 @@ type=str, required=True, ) +parser.add_argument( + "-mem", + dest="mem_detail", + type=str, + required=True, +) args = parser.parse_args(sys.argv[1:]) acc_result_path = args.acc_result prof_detail_path = args.prof_detail +mem_detail_path = args.mem_detail acc_summary = csv.reader(open(acc_result_path, 'r')) prof_details = csv.reader(open(prof_detail_path, 'r')) +mem_details = csv.reader(open(mem_detail_path, 'r')) acc_dict = {} acc_summary = list(acc_summary) @@ -31,8 +39,19 @@ acc_dict[fwd_name] = acc_summary[i][1] acc_dict[bwd_name] = acc_summary[i][2] +mem_dict = {} +mem_details = list(mem_details) +for i in range(len(mem_details)): + if i == 0: + continue + else: + fwd_name = mem_details[i][0] + ".forward" + mem_dict[fwd_name] = [mem_details[i][1], mem_details[i][2]] + + + prof_details = list(prof_details) -csv_head = tuple(prof_details[0]).__add__(tuple(["Acc_status"])) +csv_head = tuple(prof_details[0])+(tuple(["Acc_status", "Bench_memory", "Device_memory"])) for item in prof_details[1:]: name = item[0] try: @@ -41,6 +60,20 @@ status = "N/A" item.append(status) +for item in prof_details[1:]: + name = item[0] + try: + bench_mem = mem_dict[name][0] + except: + bench_mem = "" + item.append(bench_mem) + + try: + device_mem = mem_dict[name][1] + except: + device_mem = "" + item.append(device_mem) + with open('prof_summary.csv', 'w') as csvfile: writer = csv.writer(csvfile) writer.writerow(csv_head) diff --git a/paddleapex/test/test.sh b/paddleapex/test/test.sh index 9877e2d..d2d5015 100644 --- a/paddleapex/test/test.sh +++ b/paddleapex/test/test.sh @@ -30,13 +30,11 @@ cp -r ../apex ./ python ./apex/run_paddle.py -json ./dump_info/rank0_step0/forward_rank0.json -out ./test_pipeline_force -backend $BACKEND \ -mode all -dtype FP32,BF16 -python ./apex/run_paddle.py -json ./dump_info/rank0_step0/forward_rank0.json -out ./test_pipeline_origin_data -backend $BACKEND \ - -mode acc - python ./apex/acc_direct_cmp.py -bench ./test_pipeline_force/FP32 -device ./test_pipeline_force/BF16 -o ./direct_cmp_test_BF16 python ./apex/prof_cmp.py -bench ./test_pipeline_force/BF16 -device ./test_pipeline_force/BF16 -o ./prof_cmp_test_BF16 -python ./apex/mem_cmp.py -bench ./test_pipeline_force/FP32 -device ./test_pipeline_force/FP32 -o ./prof_cmp_test_FP32 -python ./apex/acc_direct_cmp.py -bench ./test_pipeline_force/BF16 -device ./test_pipeline_force/BF16 -o ./direct_cmp_test_BF16 +python ./apex/prof_cmp.py -bench ./test_pipeline_force/FP32 -device ./test_pipeline_force/FP32 -o ./prof_cmp_test_FP32 + +python ./apex/mem_cmp.py -bench ./test_pipeline_force/FP32 -device ./test_pipeline_force/FP32 -o ./prof_cmp_test_FP32