From 3fc130c3c77e3fec2a620974435218a058f9ab7d Mon Sep 17 00:00:00 2001 From: Prathamesh Kulkarni Date: Tue, 5 May 2026 02:23:39 -0700 Subject: [PATCH] Add perf timestamps to top level functions. This patch is a counter part to GCC's patch for enabling time profile based reordering based on perf timestamps with AutoFDO: https://gcc.gnu.org/pipermail/gcc-patches/2025-October/696758.html The patch adds a new timestamp field to Symbol, and emits it to gcov for version >= 3. Currently, the patch only records timestamps from PMU mode (PerfDataSampleReader), I will post a follow up patch shortly for recording timestamps with SPE. Results: On an internal large workload with GCC's AutoFDO and time profile reordering (enabled by this patch), we see an improvement of ~32%. --- CMakeLists.txt | 19 +++++++++++++++++++ profile.cc | 11 +++++++++++ profile.h | 3 ++- profile_reader.cc | 6 ++++++ profile_writer.cc | 4 ++++ sample_reader.cc | 3 +++ sample_reader.h | 6 ++++++ symbol_map.cc | 9 +++++++-- symbol_map.h | 7 ++++++- timestamp_test.cc | 31 +++++++++++++++++++++++++++++++ 10 files changed, 95 insertions(+), 4 deletions(-) create mode 100644 timestamp_test.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index b137172..419ad66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,6 +181,25 @@ function (build_gcov) glog) add_test(NAME summary_calculation_test COMMAND summary_calculation_test) + add_executable(timestamp_test + timestamp_test.cc + instruction_map.cc + profile.cc + profile_creator.cc + sample_reader.cc + simple_spe_sample_reader.cc + symbol_map.cc) + target_link_libraries(timestamp_test + quipper_perf + gtest + gtest_main + absl::flags + absl::flags_parse + addr2line_lib + glog) + add_test(NAME timestamp_test COMMAND timestamp_test) + + add_custom_command(PRE_BUILD OUTPUT prepare_cmds COMMAND ln -s -f ${CMAKE_HOME_DIRECTORY}/testdata) diff --git a/profile.cc b/profile.cc index ac0ef84..c6f71cf 100644 --- a/profile.cc +++ b/profile.cc @@ -60,6 +60,15 @@ void Profile::AggregatePerFunctionProfile(bool check_lbr_entry) { } } + const AddressTimestampMap *address_timestamp_map = &sample_reader_->address_timestamp_map (); + for (const auto &[addr, timestamp] : *address_timestamp_map) { + uint64_t vaddr = symbol_map_->get_static_vaddr(addr); + ProfileMaps *maps = GetProfileMaps(vaddr); + if (maps != nullptr && maps->timestamp == 0) { + maps->timestamp = timestamp; + } + } + #if defined(HAVE_LLVM) std::unique_ptr Disassembler; if (check_lbr_entry) { @@ -149,6 +158,8 @@ void Profile::ProcessPerFunctionProfile(absl::string_view func_name, maps.end_addr); // LOG(INFO) << "Built instruction map for func: " << func_name; + symbol_map_->AddSymbolTimestamp (func_name, maps.timestamp); + AddressCountMap map; const AddressCountMap *map_ptr; if (absl::GetFlag(FLAGS_use_lbr)) { diff --git a/profile.h b/profile.h index e590fa8..0d2850a 100644 --- a/profile.h +++ b/profile.h @@ -52,10 +52,11 @@ class Profile { // Internal data structure that aggregates profile for each symbol. struct ProfileMaps { ProfileMaps(uint64_t start, uint64_t end) - : start_addr(start), end_addr(end) {} + : start_addr(start), end_addr(end), timestamp (0) {} uint64_t GetAggregatedCount() const; uint64_t start_addr; uint64_t end_addr; + uint64_t timestamp; AddressCountMap address_count_map; RangeCountMap range_count_map; BranchCountMap branch_count_map; diff --git a/profile_reader.cc b/profile_reader.cc index 3f198d2..a8880d5 100644 --- a/profile_reader.cc +++ b/profile_reader.cc @@ -39,8 +39,13 @@ void AutoFDOProfileReader::ReadFunctionProfile() { void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, bool update) { uint64_t head_count; + uint64_t timestamp = 0; + if (stack.size() == 0) { head_count = gcov_read_counter(); + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + timestamp = gcov_read_counter(); + } } else { head_count = 0; } @@ -53,6 +58,7 @@ void AutoFDOProfileReader::ReadSymbolProfile(const SourceStack &stack, uint32_t num_callsites = gcov_read_unsigned(); if (stack.size() == 0) { symbol_map_->AddSymbol(name); + symbol_map_->AddSymbolTimestamp(name, timestamp); const_cast(symbol_map_->GetSymbolByName(name))->info.file_name = file_name; if (!force_update_ && symbol_map_->GetSymbolByName(name)->total_count > 0) { diff --git a/profile_writer.cc b/profile_writer.cc index 0c5fe86..3b5323d 100644 --- a/profile_writer.cc +++ b/profile_writer.cc @@ -124,6 +124,9 @@ class SourceProfileWriter: public SymbolTraverser { virtual void VisitTopSymbol(const std::string &name, const Symbol *node) { gcov_write_counter(node->head_count); + if (absl::GetFlag(FLAGS_gcov_version) >= 3) { + gcov_write_counter(node->timestamp); + } unsigned NameIdx = GetStringIndex(Symbol::Name(name.c_str())); CHECK(NameIdx != 0 && "name index 0 should never be present as a top-level symbol!"); gcov_write_unsigned(NameIdx); @@ -448,6 +451,7 @@ class ProfileDumper : public SymbolTraverser { node->Dump(0); absl::PrintF("node->head_count: %u\n", node->head_count); printf("GetStringIndex(%s): %u\n", name.c_str(), GetStringIndex(name)); + absl::PrintF("node->timestamp: %lu\n", (unsigned long) node->timestamp); printf("\n"); } diff --git a/sample_reader.cc b/sample_reader.cc index f37be53..ac5d6ab 100644 --- a/sample_reader.cc +++ b/sample_reader.cc @@ -296,6 +296,9 @@ bool PerfDataSampleReader::Append(const std::string &profile_file) { } if (MatchBinary(event.dso_and_offset)) { address_count_map_[event.dso_and_offset.offset()]++; + uint64_t address = event.dso_and_offset.offset(); + uint64_t timestamp = event.event_ptr->timestamp(); + address_timestamp_map_.insert({address, timestamp}); } int start_index = 0; while (start_index < event.branch_stack.size() && diff --git a/sample_reader.h b/sample_reader.h index 09f057b..3426f08 100644 --- a/sample_reader.h +++ b/sample_reader.h @@ -31,6 +31,7 @@ typedef std::pair Range; typedef std::map RangeCountMap; typedef std::pair Branch; typedef std::map BranchCountMap; +typedef std::map AddressTimestampMap; // Reads in the profile data, and represent it in address_count_map_. class SampleReader { @@ -52,6 +53,10 @@ class SampleReader { return branch_count_map_; } + const AddressTimestampMap &address_timestamp_map() const { + return address_timestamp_map_; + } + std::set GetSampledAddresses() const; // Returns the sample count for a given instruction. @@ -77,6 +82,7 @@ class SampleReader { AddressCountMap address_count_map_; RangeCountMap range_count_map_; BranchCountMap branch_count_map_; + AddressTimestampMap address_timestamp_map_; bool is_kernel_ = false; }; diff --git a/symbol_map.cc b/symbol_map.cc index 6a87c38..2a8d949 100644 --- a/symbol_map.cc +++ b/symbol_map.cc @@ -513,6 +513,11 @@ void SymbolMap::AddSymbolEntryCount(absl::string_view symbol_name, symbol->total_count += total_count; } +void SymbolMap::AddSymbolTimestamp(absl::string_view symbol_name, uint64_t timestamp) { + Symbol *symbol = map_.find(symbol_name)->second; + symbol->timestamp = timestamp; +} + Symbol *SymbolMap::TraverseInlineStack(absl::string_view symbol_name, const SourceStack &src, uint64_t count, DataSource data_source) { @@ -656,8 +661,8 @@ void Symbol::DumpBody(int indent, bool for_analysis) const { void Symbol::Dump(int indent) const { const std::string printed_name = getPrintName(info.func_name); if (indent == 0) { - absl::PrintF("%s:%s total:%u head:%u\n", printed_name, info.file_name, - total_count, head_count); + absl::PrintF("%s:%s total:%u head:%u timestamp: %lu\n", printed_name, info.file_name, + total_count, head_count, (unsigned long) timestamp); } else { absl::PrintF("%s:%s total:%u\n", printed_name, info.file_name, total_count); } diff --git a/symbol_map.h b/symbol_map.h index b9c56df..6613f9e 100644 --- a/symbol_map.h +++ b/symbol_map.h @@ -153,7 +153,8 @@ class Symbol { total_count_incl(0), head_count(0), callsites(0), - pos_counts() { + pos_counts(), + timestamp(0) { } // This constructor is used to create aliased symbol. @@ -243,6 +244,8 @@ class Symbol { CallsiteMap callsites; // Map from source location to count and instruction number. PositionCountMap pos_counts; + // timestamp of first executed instruction + uint64_t timestamp; }; // Vector of unique pointers to symbols. @@ -421,6 +424,8 @@ class SymbolMap { void AddSymbolEntryCount(absl::string_view symbol, uint64_t head_count, uint64_t total_count = 0); + void AddSymbolTimestamp(absl::string_view symbol, uint64_t timestamp); + // DataSource represents what kind of data is used to generate afdo profile. // PERFDATA: convert perf.data to afdo profile. // AFDOPROTO: convert afdo proto generated by GWP autofdo pipeline to diff --git a/timestamp_test.cc b/timestamp_test.cc new file mode 100644 index 0000000..b740671 --- /dev/null +++ b/timestamp_test.cc @@ -0,0 +1,31 @@ +#include +#include "profile_creator.h" +#include "symbol_map.h" +#include "gmock/gmock.h" + +using namespace devtools_crosstool_autofdo; + +TEST(TimestampTest, AssignTimestampToSymbol) { + std::string binary = ::testing::SrcDir() + "/testdata/llvm_function_samples.binary"; + std::string profile_name = ::testing::SrcDir() + "/testdata/llvm_function_samples_perf.data"; + std::string profiler = "perf"; + + ProfileCreator creator(binary); + SymbolMap symbol_map(binary); + + EXPECT_TRUE(creator.ReadSample(profile_name, profiler)); + symbol_map.ReadLoadableExecSegmentInfo(creator.IsKernelSample()); + EXPECT_TRUE(creator.ComputeProfile(&symbol_map, false)); + + // The timestamp associated with function is the timestamp of first execution + // of the first sampled instruction in the function. + + const Symbol *main_sym = symbol_map.GetSymbolByName("main"); + EXPECT_EQ(main_sym->timestamp, 801840661247321); + + const Symbol *Z3bari = symbol_map.GetSymbolByName("_Z3bari"); + EXPECT_EQ(Z3bari->timestamp, 801840748463085); + + const Symbol *Z3fooi = symbol_map.GetSymbolByName("_Z3fooi"); + EXPECT_EQ(Z3fooi->timestamp, 801841064917429); +}