-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[lld][BP] Print total size of startup symbols #145106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-lld-macho @llvm/pr-subscribers-lld-elf Author: Ellis Hoag (ellishg) ChangesA good proxy to estimate the number of page faults during startup is the total size of startup functions. Assuming profiles are up-to-date, we can measure this total size pretty easily. Note that if profile data is old, this number could be wrong. Full diff: https://github.com/llvm/llvm-project/pull/145106.diff 3 Files Affected:
diff --git a/lld/include/lld/Common/BPSectionOrdererBase.inc b/lld/include/lld/Common/BPSectionOrdererBase.inc
index bb2e55af1eb35..fbeca187aa76f 100644
--- a/lld/include/lld/Common/BPSectionOrdererBase.inc
+++ b/lld/include/lld/Common/BPSectionOrdererBase.inc
@@ -292,45 +292,54 @@ auto BPOrderer<D>::computeOrder(
bp.run(nodesForDataCompression);
}
- unsigned numStartupSections = 0;
- unsigned numCodeCompressionSections = 0;
- unsigned numDuplicateCodeSections = 0;
- unsigned numDataCompressionSections = 0;
- unsigned numDuplicateDataSections = 0;
+ unsigned numStartupSections = 0, startupSize = 0;
+ unsigned numCodeCompressionSections = 0, codeCompressionSize = 0;
+ unsigned numDuplicateCodeSections = 0, duplicateCodeSize = 0;
+ unsigned numDataCompressionSections = 0, dataCompressionSize = 0;
+ unsigned numDuplicateDataSections = 0, duplicateDataSize = 0;
SetVector<const Section *> orderedSections;
// Order startup functions,
for (auto &node : nodesForStartup) {
const auto *isec = sections[node.Id];
- if (orderedSections.insert(isec))
+ if (orderedSections.insert(isec)) {
+ startupSize += D::getSize(*isec);
++numStartupSections;
+ }
}
// then functions for compression,
for (auto &node : nodesForFunctionCompression) {
const auto *isec = sections[node.Id];
- if (orderedSections.insert(isec))
+ if (orderedSections.insert(isec)) {
+ codeCompressionSize += D::getSize(*isec);
++numCodeCompressionSections;
-
+ }
auto It = duplicateSectionIdxs.find(node.Id);
if (It == duplicateSectionIdxs.end())
continue;
for (auto dupSecIdx : It->getSecond()) {
const auto *dupIsec = sections[dupSecIdx];
- if (orderedSections.insert(dupIsec))
+ if (orderedSections.insert(dupIsec)) {
+ duplicateCodeSize += D::getSize(*isec);
++numDuplicateCodeSections;
+ }
}
}
// then data for compression.
for (auto &node : nodesForDataCompression) {
const auto *isec = sections[node.Id];
- if (orderedSections.insert(isec))
+ if (orderedSections.insert(isec)) {
+ dataCompressionSize += D::getSize(*isec);
++numDataCompressionSections;
+ }
auto It = duplicateSectionIdxs.find(node.Id);
if (It == duplicateSectionIdxs.end())
continue;
for (auto dupSecIdx : It->getSecond()) {
const auto *dupIsec = sections[dupSecIdx];
- if (orderedSections.insert(dupIsec))
+ if (orderedSections.insert(dupIsec)) {
+ duplicateDataSize += D::getSize(*isec);
++numDuplicateDataSections;
+ }
}
}
@@ -339,14 +348,21 @@ auto BPOrderer<D>::computeOrder(
numStartupSections + numCodeCompressionSections +
numDuplicateCodeSections + numDataCompressionSections +
numDuplicateDataSections;
- dbgs()
- << "Ordered " << numTotalOrderedSections
- << " sections using balanced partitioning:\n Functions for startup: "
- << numStartupSections
- << "\n Functions for compression: " << numCodeCompressionSections
- << "\n Duplicate functions: " << numDuplicateCodeSections
- << "\n Data for compression: " << numDataCompressionSections
- << "\n Duplicate data: " << numDuplicateDataSections << "\n";
+ unsigned totalOrderedSize = startupSize + codeCompressionSize +
+ duplicateCodeSize + dataCompressionSize +
+ duplicateDataSize;
+ dbgs() << "Ordered " << numTotalOrderedSections << " sections ("
+ << totalOrderedSize << " bytes) using balanced partitioning:\n";
+ dbgs() << " Functions for startup: " << numStartupSections << " ("
+ << startupSize << " bytes)\n";
+ dbgs() << " Functions for compression: " << numCodeCompressionSections
+ << " (" << codeCompressionSize << " bytes)\n";
+ dbgs() << " Duplicate functions: " << numDuplicateCodeSections << " ("
+ << duplicateCodeSize << " bytes)\n";
+ dbgs() << " Data for compression: " << numDataCompressionSections << " ("
+ << dataCompressionSize << " bytes)\n";
+ dbgs() << " Duplicate data: " << numDuplicateDataSections << " ("
+ << duplicateDataSize << " bytes)\n";
if (!profilePath.empty()) {
// Evaluate this function order for startup
diff --git a/lld/test/ELF/bp-section-orderer.s b/lld/test/ELF/bp-section-orderer.s
index 4889db63cd4d1..4a8374f7991fa 100644
--- a/lld/test/ELF/bp-section-orderer.s
+++ b/lld/test/ELF/bp-section-orderer.s
@@ -21,7 +21,7 @@
# RUN: llvm-profdata merge a.proftext -o a.profdata
# RUN: ld.lld a.o --irpgo-profile=a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --gc-sections 2>&1 | FileCheck %s --check-prefix=STARTUP-FUNC-ORDER
-# STARTUP-FUNC-ORDER: Ordered 3 sections using balanced partitioning
+# STARTUP-FUNC-ORDER: Ordered 3 sections ({{[0-9]+}} bytes) using balanced partitioning
# STARTUP-FUNC-ORDER: Total area under the page fault curve: 3.
# RUN: ld.lld -o out.s a.o --irpgo-profile=a.profdata --bp-startup-sort=function
@@ -49,10 +49,10 @@
# RUN: llvm-nm -jn out.cbs | tr '\n' , | FileCheck %s --check-prefix=CBOTH-STARTUP
# CBOTH-STARTUP: s5,s3,s4,s2,s1,A,B,C,F,E,D,merged1,merged2,_start,d4,d1,d3,d2,{{$}}
-# BP-COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
-# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections using balanced partitioning
-# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning
-# BP-COMPRESSION-BOTH: Ordered 18 sections using balanced partitioning
+# BP-COMPRESSION-FUNC: Ordered 9 sections ({{[0-9]+}} bytes) using balanced partitioning
+# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections ({{[0-9]+}} bytes) using balanced partitioning
+# BP-COMPRESSION-DATA: Ordered 9 sections ({{[0-9]+}} bytes) using balanced partitioning
+# BP-COMPRESSION-BOTH: Ordered 18 sections ({{[0-9]+}} bytes) using balanced partitioning
#--- a.proftext
:ir
diff --git a/lld/test/MachO/bp-section-orderer.s b/lld/test/MachO/bp-section-orderer.s
index 93d0b85731de7..d933f2a27851c 100644
--- a/lld/test/MachO/bp-section-orderer.s
+++ b/lld/test/MachO/bp-section-orderer.s
@@ -9,8 +9,8 @@
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile %t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --bp-compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP-ICF
-# STARTUP: Ordered 5 sections using balanced partitioning
-# STARTUP-ICF: Ordered 4 sections using balanced partitioning
+# STARTUP: Ordered 5 sections ({{[0-9]+}} bytes) using balanced partitioning
+# STARTUP-ICF: Ordered 4 sections ({{[0-9]+}} bytes) using balanced partitioning
# Check that orderfiles take precedence over BP
# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o - %t/a.o -order_file %t/a.orderfile --irpgo-profile-sort=%t/a.profdata | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE
@@ -50,10 +50,10 @@
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=%t/a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH
-# COMPRESSION-FUNC: Ordered 9 sections using balanced partitioning
-# COMPRESSION-ICF-FUNC: Ordered 7 sections using balanced partitioning
-# COMPRESSION-DATA: Ordered 7 sections using balanced partitioning
-# COMPRESSION-BOTH: Ordered 16 sections using balanced partitioning
+# COMPRESSION-FUNC: Ordered 9 sections ({{[0-9]+}} bytes) using balanced partitioning
+# COMPRESSION-ICF-FUNC: Ordered 7 sections ({{[0-9]+}} bytes) using balanced partitioning
+# COMPRESSION-DATA: Ordered 7 sections ({{[0-9]+}} bytes) using balanced partitioning
+# COMPRESSION-BOTH: Ordered 16 sections ({{[0-9]+}} bytes) using balanced partitioning
#--- a.s
.text
|
CC @Colibrow |
LGTM. |
# BP-COMPRESSION-ICF-FUNC: Ordered 8 sections using balanced partitioning | ||
# BP-COMPRESSION-DATA: Ordered 9 sections using balanced partitioning | ||
# BP-COMPRESSION-BOTH: Ordered 18 sections using balanced partitioning | ||
# BP-COMPRESSION-FUNC: Ordered 9 sections ({{[0-9]+}} bytes) using balanced partitioning |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[[#]] bytes
@@ -9,8 +9,8 @@ | |||
|
|||
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile %t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP | |||
# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --bp-compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP-ICF | |||
# STARTUP: Ordered 5 sections using balanced partitioning | |||
# STARTUP-ICF: Ordered 4 sections using balanced partitioning | |||
# STARTUP: Ordered 5 sections ({{[0-9]+}} bytes) using balanced partitioning |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[[#]] bytes
A good proxy to estimate the number of page faults during startup is the total size of startup functions. Assuming profiles are up-to-date, we can measure this total size pretty easily. Note that if profile data is old, this number could be wrong.