478 os <<
"{\n \"displayTimeUnit\":\"us\",\n \"traceEvents\":[";
489 if (it ==
data.end()) {
493 for (
const auto& [p, entry] : it->second) {
494 if (p == parent_key) {
499 if (self ==
nullptr || self->
time_max == 0) {
505 parent_key.empty() ?
OperationKey{
"_root" } : parent_key,
506 static_cast<double>(ts_start_ns) / 1000.0,
507 static_cast<double>(self->
time_max) / 1000.0,
513 for (
const auto& [child_key, pmap] :
data) {
514 auto cit = pmap.find(
key);
515 if (cit != pmap.end() && cit->second.time_max > 0) {
516 children.emplace_back(child_key, cit->second.time_max);
520 std::sort(children.begin(), children.end(), [](
const auto&
a,
const auto&
b) { return a.second > b.second; });
522 uint64_t child_offset = 0;
523 for (
const auto& [child_key, child_dur] : children) {
524 emit_tree(child_key,
key, ts_start_ns + child_offset);
525 child_offset += child_dur;
531 for (
const auto& [
key, pmap] :
data) {
532 auto pit = pmap.find(
"");
533 if (pit != pmap.end() && pit->second.time_max > 0) {
534 roots.emplace_back(
key, pit->second.time_max);
537 std::sort(roots.begin(), roots.end(), [](
const auto&
a,
const auto&
b) { return a.second > b.second; });
539 uint64_t root_offset = 0;
540 for (
const auto& [root_key, root_dur] : roots) {
542 root_offset += root_dur;
552 if (aggregated.empty()) {
553 os <<
"No benchmark data collected\n";
559 print_separator(os,
true);
560 os << Colors::BOLD <<
" Benchmark Results" << Colors::RESET <<
"\n";
561 print_separator(os,
true);
565 for (
auto& [
key, entry_map] : aggregated) {
566 for (
auto& [parent_key, entry] : entry_map) {
567 if (entry.count > 0) {
568 keys_to_parents[
key].insert(parent_key);
574 auto print_entry = [&](
const AggregateEntry& entry,
size_t indent_level,
bool is_last, uint64_t parent_time) {
575 std::string indent(indent_level * 2,
' ');
576 std::string prefix = (indent_level == 0) ?
"" : (is_last ?
"└─ " :
"├─ ");
579 const size_t name_width = 80;
580 std::string display_name = std::string(entry.
key);
581 if (display_name.length() > name_width) {
582 display_name = display_name.substr(0, name_width - 3) +
"...";
585 double time_ms =
static_cast<double>(entry.
time_max) / 1000000.0;
586 auto colors = get_time_colors(time_ms);
589 os << indent << prefix << colors.name_color;
590 if (time_ms >= 1000.0 && colors.name_color == Colors::BOLD) {
591 os << Colors::YELLOW;
593 os << std::left << std::setw(static_cast<int>(name_width)) << display_name << Colors::RESET;
597 if (time_ms < 100.0) {
599 std::ostringstream minimal_oss;
600 minimal_oss << Colors::MAGENTA <<
"[" << indent_level <<
"] " << Colors::RESET;
601 minimal_oss << format_percentage_section(time_ms, static_cast<double>(parent_time), indent_level);
602 minimal_oss <<
" " <<
std::setw(10) <<
"";
603 os <<
" " << colors.time_color <<
std::setw(40) << std::left << minimal_oss.str() << Colors::RESET;
605 std::string aligned_section =
606 format_aligned_section(time_ms,
static_cast<double>(parent_time), entry.
count, indent_level);
607 os <<
" " << colors.time_color <<
std::setw(40) << std::left << aligned_section << Colors::RESET;
609 double mean_ms = entry.
time_mean / 1000000.0;
611 os <<
" " << entry.
num_threads <<
" threads " << mean_ms <<
"ms average " << stddev_percentage
625 uint64_t parent_time,
627 auto it = aggregated.find(
key);
628 if (it == aggregated.end()) {
634 for (
const auto& [parent_key, entry] : it->second) {
635 if ((indent_level == 0 && parent_key.empty()) || (indent_level > 0 && parent_key == current_parent)) {
636 entry_to_print = &entry;
641 if (!entry_to_print) {
646 print_entry(*entry_to_print, indent_level, is_last, parent_time);
650 if (!printed_in_detail.contains(
key)) {
651 for (
const auto& [child_key, parent_map] : aggregated) {
652 for (
const auto& [parent_key, entry] : parent_map) {
653 if (parent_key ==
key && entry.
time_max >= 500000) {
654 children.push_back(child_key);
659 printed_in_detail.insert(
key);
666 if (
auto it = aggregated.find(
a); it != aggregated.end()) {
667 for (
const auto& [parent_key, entry] : it->second) {
668 if (parent_key ==
key) {
674 if (
auto it = aggregated.find(
b); it != aggregated.end()) {
675 for (
const auto& [parent_key, entry] : it->second) {
676 if (parent_key ==
key) {
682 return time_a > time_b;
686 uint64_t children_total_time = 0;
687 for (
const auto& child_key : children) {
688 if (
auto it = aggregated.find(child_key); it != aggregated.end()) {
689 for (
const auto& [parent_key, entry] : it->second) {
690 if (parent_key ==
key && entry.
time_max >= 500000) {
691 children_total_time += entry.
time_max;
696 uint64_t parent_total_time = entry_to_print->
time_max;
697 bool should_add_other =
false;
698 if (!children.empty() && parent_total_time > 0 && children_total_time < parent_total_time) {
699 uint64_t unaccounted = parent_total_time - children_total_time;
700 double percentage = (
static_cast<double>(unaccounted) /
static_cast<double>(parent_total_time)) * 100.0;
701 should_add_other = percentage > 5.0 && unaccounted > 0;
703 uint64_t other_time = should_add_other ? (parent_total_time - children_total_time) : 0;
705 if (!children.empty() && keys_to_parents[
key].size() > 1) {
706 os << std::string(indent_level * 2,
' ') <<
" ├─ NOTE: Shared children. Can add up to > 100%.\n";
710 for (
size_t i = 0; i < children.size(); ++i) {
711 bool is_last_child = (i == children.size() - 1) && !should_add_other;
712 print_hierarchy(children[i], indent_level + 1, is_last_child, entry_to_print->
time,
key);
716 if (should_add_other && keys_to_parents[
key].size() <= 1) {
718 other_entry.
key =
"(other)";
719 other_entry.
time = other_time;
721 other_entry.
count = 1;
723 print_entry(other_entry, indent_level + 1,
true, parent_total_time);
729 for (
const auto& [
key, parent_map] : aggregated) {
730 auto empty_parent_it = parent_map.find(
"");
731 if (empty_parent_it != parent_map.end() && empty_parent_it->second.time > 0) {
732 roots.push_back(
key);
740 if (
auto it_a = aggregated.find(
a); it_a != aggregated.end()) {
741 if (
auto parent_it = it_a->second.find(
""); parent_it != it_a->second.end()) {
742 time_a = parent_it->second.time_max;
745 if (
auto it_b = aggregated.find(
b); it_b != aggregated.end()) {
746 if (
auto parent_it = it_b->second.find(
""); parent_it != it_b->second.end()) {
747 time_b = parent_it->second.time_max;
750 return time_a > time_b;
754 for (
size_t i = 0; i < roots.size(); ++i) {
755 print_hierarchy(roots[i], 0, i == roots.size() - 1, 0,
"");
759 print_separator(os,
false);
763 for (
const auto& [
key, _] : aggregated) {
764 unique_funcs.insert(
key);
766 size_t unique_functions_count = unique_funcs.size();
768 uint64_t shared_count = 0;
769 for (
const auto& [
key, parents] : keys_to_parents) {
770 if (parents.size() > 1) {
775 uint64_t total_time = 0;
776 for (
const auto& [_, parent_map] : aggregated) {
777 if (
auto it = parent_map.find(
""); it != parent_map.end()) {
778 total_time =
std::max(total_time, it->second.time_max);
782 uint64_t total_calls = 0;
783 for (
const auto& [_, parent_map] : aggregated) {
784 for (
const auto& [__, entry] : parent_map) {
785 total_calls += entry.
count;
789 double total_time_ms =
static_cast<double>(total_time) / 1000000.0;
791 os <<
" " << Colors::BOLD <<
"Total: " << Colors::RESET << Colors::MAGENTA << unique_functions_count
792 <<
" functions" << Colors::RESET;
793 if (shared_count > 0) {
794 os <<
" (" << Colors::RED << shared_count <<
" shared" << Colors::RESET <<
")";
796 os <<
", " << Colors::GREEN << total_calls <<
" measurements" << Colors::RESET <<
", " << Colors::YELLOW;
797 if (total_time_ms >= 1000.0) {
805 print_separator(os,
true);