Thanks. I will try it.

发件人: via groups.io <davidr=ghs.com@groups.io>
发送时间: 2024年12月6日 8:55
收件人: gaoliming <gaoliming@byosoft.com.cn>; devel@edk2.groups.io
主题: Re: [edk2-devel] 回复: [edk2-devel] FDF parser performance degrades rapidly on non-trivially sized inputs

Here is the code I was testing with. You can probably reduce the changes down to just _SkipWhiteSpace(), but I haven't tried that yet.

diff --git a/BaseTools/Source/Python/GenFds/FdfParser.py b/BaseTools/Source/Python/GenFds/FdfParser.py
index feb4c72779..8c57720116 100644
--- a/BaseTools/Source/Python/GenFds/FdfParser.py
+++ b/BaseTools/Source/Python/GenFds/FdfParser.py
@@ -12,6 +12,7 @@
#
from __future__ import print_function
from __future__ import absolute_import
+from io import StringIO
from re import compile, DOTALL
from string import hexdigits
from uuid import UUID
@@ -253,7 +254,7 @@ class FdfParser:
self.CurrentFdName = None
self.CurrentFvName = None
self._Token = ""
- self._SkippedChars = ""
+ self._SkippedChars = StringIO()
GlobalData.gFdfParser = self

# Used to section info
@@ -276,7 +277,7 @@ class FdfParser:
def _SkipWhiteSpace(self):
while not self._EndOfFile():
if self._CurrentChar() in {TAB_PRINTCHAR_NUL, T_CHAR_CR, TAB_LINE_BREAK, TAB_SPACE_SPLIT, T_CHAR_TAB}:
- self._SkippedChars += str(self._CurrentChar())
+ self._SkippedChars.write(str(self._CurrentChar()))
self._GetOneChar()
else:
return
@@ -696,7 +697,7 @@ class FdfParser:
Header = self._Token
if not self._Token.endswith(TAB_SECTION_END):
self._SkipToToken(TAB_SECTION_END)
- Header += self._SkippedChars
+ Header += self._SkippedChars.getvalue()
if Header.find('$(') != -1:
raise Warning("macro cannot be used in section header", self.FileName, self.CurrentLineNumber)
self._SectionHeaderParser(Header)
@@ -1226,7 +1227,7 @@ class FdfParser:
raise Warning(QuoteToUse, self.FileName, self.CurrentLineNumber)
if currentLineNumber != self.CurrentLineNumber:
raise Warning(QuoteToUse, self.FileName, self.CurrentLineNumber)
- self._Token = self._SkippedChars.rstrip(QuoteToUse)
+ self._Token = self._SkippedChars.getvalue().rstrip(QuoteToUse)
return True

## _SkipToToken() method
@@ -1243,7 +1244,7 @@ class FdfParser:
def _SkipToToken(self, String, IgnoreCase = False):
StartPos = self.GetFileBufferPos()

- self._SkippedChars = ""
+ self._SkippedChars = StringIO()
while not self._EndOfFile():
index = -1
if IgnoreCase:
@@ -1252,13 +1253,13 @@ class FdfParser:
index = self._CurrentLine()[self.CurrentOffsetWithinLine: ].find(String)
if index == 0:
self.CurrentOffsetWithinLine += len(String)
- self._SkippedChars += String
+ self._SkippedChars.write(String)
return True
- self._SkippedChars += str(self._CurrentChar())
+ self._SkippedChars.write(str(self._CurrentChar()))
self._GetOneChar()

self.SetFileBufferPos(StartPos)
- self._SkippedChars = ""
+ self._SkippedChars = StringIO()
return False

## GetFileBufferPos() method
@@ -2890,7 +2891,7 @@ class FdfParser:
if not self._SkipToToken(T_CHAR_BRACE_R):
raise Warning.Expected("Depex expression ending '}'", self.FileName, self.CurrentLineNumber)

- DepexSectionObj.Expression = self._SkippedChars.rstrip(T_CHAR_BRACE_R)
+ DepexSectionObj.Expression = self._SkippedChars.getvalue().rstrip(T_CHAR_BRACE_R)
Obj.SectionList.append(DepexSectionObj)

elif self._IsKeyword("SUBTYPE_GUID"):
@@ -3525,7 +3526,7 @@ class FdfParser:
if not self._SkipToToken(TAB_SPLIT):
raise Warning.Expected("'.'", self.FileName, self.CurrentLineNumber)

- Arch = self._SkippedChars.rstrip(TAB_SPLIT)
+ Arch = self._SkippedChars.getvalue().rstrip(TAB_SPLIT)

ModuleType = self._GetModuleType()

Also, if you would like to profile the build process you can apply this patch.

diff --git a/BaseTools/Source/Python/build/build.py b/BaseTools/Source/Python/build/build.py
index 51fb1f433e..396729efd9 100755
--- a/BaseTools/Source/Python/build/build.py
+++ b/BaseTools/Source/Python/build/build.py
@@ -2778,12 +2778,18 @@ def Main():
Log_Agent.join()
return ReturnCode

+import cProfile
+
if __name__ == '__main__':
try:
mp.set_start_method('spawn')
except:
pass
- r = Main()
+
+ with cProfile.Profile() as pr:
+ r = Main()
+ pr.print_stats('tottime')
+
## 0-127 is a safe return range, and 1 is a standard default error
if r < 0 or r > 127: r = 1
sys.exit(r)