public inbox for devel@edk2.groups.io
 help / color / mirror / Atom feed
* [Patch] BaseTools: Optimize string concatenation
@ 2018-11-08 10:16 BobCF
  2018-11-08 15:26 ` Carsey, Jaben
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: BobCF @ 2018-11-08 10:16 UTC (permalink / raw)
  To: edk2-devel; +Cc: Liming Gao, Jaben Carsey

https://bugzilla.tianocore.org/show_bug.cgi?id=1288

This patch is one of build tool performance improvement
series patches.

This patch is going to use join function instead of
string += string2 statement.

Current code use string += string2 in a loop to combine
a string. while creating a string list in a loop and using
"".join(stringlist) after the loop will be much faster.

Contributed-under: TianoCore Contribution Agreement 1.1
Signed-off-by: BobCF <bob.c.feng@intel.com>
Cc: Liming Gao <liming.gao@intel.com>
Cc: Jaben Carsey <jaben.carsey@intel.com>
---
 BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
 BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
 .../Source/Python/Workspace/InfBuildData.py   |  4 +-
 .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
 4 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py b/BaseTools/Source/Python/AutoGen/StrGather.py
index 361d499076..d34a9e9447 100644
--- a/BaseTools/Source/Python/AutoGen/StrGather.py
+++ b/BaseTools/Source/Python/AutoGen/StrGather.py
@@ -135,11 +135,11 @@ def AscToHexList(Ascii):
 # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
 #
 # @retval Str:           A string of .h file content
 #
 def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
-    Str = ''
+    Str = []
     ValueStartPtr = 60
     Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
     Str = WriteLine(Str, Line)
     Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
     Str = WriteLine(Str, Line)
@@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
                     Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
                 else:
                     Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
                 UnusedStr = WriteLine(UnusedStr, Line)
 
-    Str = ''.join([Str, UnusedStr])
+    Str.extend( UnusedStr)
 
     Str = WriteLine(Str, '')
     if IsCompatibleMode or UniGenCFlag:
         Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
-    return Str
+    return "".join(Str)
 
 ## Create a complete .h file
 #
 # Create a complet .h file with file header and file content
 #
@@ -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
 # @retval Str:           A string of complete .h file
 #
 def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
     HFile = WriteLine('', CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag))
 
-    return HFile
+    return "".join(HFile)
 
 ## Create a buffer to store all items in an array
 #
 # @param BinBuffer   Buffer to contain Binary data.
 # @param Array:      The array need to be formatted
@@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
 #
 def CreateArrayItem(Array, Width = 16):
     MaxLength = Width
     Index = 0
     Line = '  '
-    ArrayItem = ''
+    ArrayItem = []
 
     for Item in Array:
         if Index < MaxLength:
             Line = Line + Item + ',  '
             Index = Index + 1
@@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
             ArrayItem = WriteLine(ArrayItem, Line)
             Line = '  ' + Item + ',  '
             Index = 1
     ArrayItem = Write(ArrayItem, Line.rstrip())
 
-    return ArrayItem
+    return "".join(ArrayItem)
 
 ## CreateCFileStringValue
 #
 # Create a line with string value
 #
@@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
 
 def CreateCFileStringValue(Value):
     Value = [StringBlockType] + Value
     Str = WriteLine('', CreateArrayItem(Value))
 
-    return Str
+    return "".join(Str)
 
 ## GetFilteredLanguage
 #
 # apply get best language rules to the UNI language code list
 #
@@ -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
     #
     # Join package data
     #
     AllStr = Write(AllStr, Str)
 
-    return AllStr
+    return "".join(AllStr)
 
 ## Create end of .c file
 #
 # Create end of .c file
 #
@@ -465,11 +465,11 @@ def CreateCFileEnd():
 #
 def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
     CFile = ''
     CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
     CFile = WriteLine(CFile, CreateCFileEnd())
-    return CFile
+    return "".join(CFile)
 
 ## GetFileList
 #
 # Get a list for all files
 #
@@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList, IncludeList, IncludePathList, Ski
 
 #
 # Write an item
 #
 def Write(Target, Item):
-    return ''.join([Target, Item])
+    if isinstance(Target,str):
+        Target = [Target]
+    if not Target:
+        Target = []
+    if isinstance(Item,list):
+        Target.extend(Item)
+    else:
+        Target.append(Item)
+    return Target
 
 #
 # Write an item with a break line
 #
 def WriteLine(Target, Item):
-    return ''.join([Target, Item, '\n'])
+    if isinstance(Target,str):
+        Target = [Target]
+    if not Target:
+        Target = []
+    if isinstance(Item, list):
+        Target.extend(Item)
+    else:
+        Target.append(Item)
+    Target.append('\n')
+    return Target
 
 # This acts like the main() function for the script, unless it is 'import'ed into another
 # script.
 if __name__ == '__main__':
     EdkLogger.info('start')
diff --git a/BaseTools/Source/Python/Common/Misc.py b/BaseTools/Source/Python/Common/Misc.py
index 80236db160..8dcbe141ae 100644
--- a/BaseTools/Source/Python/Common/Misc.py
+++ b/BaseTools/Source/Python/Common/Misc.py
@@ -777,21 +777,21 @@ class TemplateString(object):
 
             return "".join(StringList)
 
     ## Constructor
     def __init__(self, Template=None):
-        self.String = ''
+        self.String = []
         self.IsBinary = False
         self._Template = Template
         self._TemplateSectionList = self._Parse(Template)
 
     ## str() operator
     #
     #   @retval     string  The string replaced
     #
     def __str__(self):
-        return self.String
+        return "".join(self.String)
 
     ## Split the template string into fragments per the ${BEGIN} and ${END} flags
     #
     #   @retval     list    A list of TemplateString.Section objects
     #
@@ -835,13 +835,16 @@ class TemplateString(object):
     #   @param      Dictionary      The placeholder dictionaries
     #
     def Append(self, AppendString, Dictionary=None):
         if Dictionary:
             SectionList = self._Parse(AppendString)
-            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
+            self.String.append( "".join(S.Instantiate(Dictionary) for S in SectionList))
         else:
-            self.String += AppendString
+            if isinstance(AppendString,list):
+                self.String.extend(AppendString)
+            else:
+                self.String.append(AppendString)
 
     ## Replace the string template with dictionary of placeholders
     #
     #   @param      Dictionary      The placeholder dictionaries
     #
@@ -1741,27 +1744,21 @@ class PathClass(object):
     #
     # @retval False The two PathClass are different
     # @retval True  The two PathClass are the same
     #
     def __eq__(self, Other):
-        if isinstance(Other, type(self)):
-            return self.Path == Other.Path
-        else:
-            return self.Path == str(Other)
+        return self.Path == str(Other)
 
     ## Override __cmp__ function
     #
     # Customize the comparsion operation of two PathClass
     #
     # @retval 0     The two PathClass are different
     # @retval -1    The first PathClass is less than the second PathClass
     # @retval 1     The first PathClass is Bigger than the second PathClass
     def __cmp__(self, Other):
-        if isinstance(Other, type(self)):
-            OtherKey = Other.Path
-        else:
-            OtherKey = str(Other)
+        OtherKey = str(Other)
 
         SelfKey = self.Path
         if SelfKey == OtherKey:
             return 0
         elif SelfKey > OtherKey:
diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py b/BaseTools/Source/Python/Workspace/InfBuildData.py
index 44d44d24eb..d615cccdf7 100644
--- a/BaseTools/Source/Python/Workspace/InfBuildData.py
+++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
@@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
         for Record in RecordList:
             Lib = Record[0]
             Instance = Record[1]
             if Instance:
                 Instance = NormPath(Instance, self._Macros)
-            RetVal[Lib] = Instance
+                RetVal[Lib] = Instance
+            else:
+                RetVal[Lib] = None
         return RetVal
 
     ## Retrieve library names (for Edk.x style of modules)
     @cached_property
     def Libraries(self):
diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
index 8d8a3e2789..55d01fa4b2 100644
--- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
+++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
@@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
     while len(LibraryConsumerList) > 0:
         M = LibraryConsumerList.pop()
         for LibraryClassName in M.LibraryClasses:
             if LibraryClassName not in LibraryInstance:
                 # override library instance for this module
-                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
-                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
-                else:
-                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
-                if LibraryPath is None or LibraryPath == "":
-                    LibraryPath = M.LibraryClasses[LibraryClassName]
-                    if LibraryPath is None or LibraryPath == "":
+                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
+                if LibraryPath is None:
+                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
+                    if LibraryPath is None:
                         if FileName:
                             EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
                                             "Instance of library class [%s] is not found" % LibraryClassName,
                                             File=FileName,
                                             ExtraData="in [%s] [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
-- 
2.19.1.windows.1



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-08 10:16 [Patch] BaseTools: Optimize string concatenation BobCF
@ 2018-11-08 15:26 ` Carsey, Jaben
  2018-11-08 16:40 ` Kinney, Michael D
  2018-11-08 16:52 ` Leif Lindholm
  2 siblings, 0 replies; 18+ messages in thread
From: Carsey, Jaben @ 2018-11-08 15:26 UTC (permalink / raw)
  To: Feng, Bob C, edk2-devel@lists.01.org; +Cc: Gao, Liming

Reviewed-by: Jaben Carsey <jaben.carsey@intel.com>

> -----Original Message-----
> From: Feng, Bob C
> Sent: Thursday, November 08, 2018 2:16 AM
> To: edk2-devel@lists.01.org
> Cc: Gao, Liming <liming.gao@intel.com>; Carsey, Jaben
> <jaben.carsey@intel.com>
> Subject: [Patch] BaseTools: Optimize string concatenation
> Importance: High
> 
> https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> 
> This patch is one of build tool performance improvement
> series patches.
> 
> This patch is going to use join function instead of
> string += string2 statement.
> 
> Current code use string += string2 in a loop to combine
> a string. while creating a string list in a loop and using
> "".join(stringlist) after the loop will be much faster.
> 
> Contributed-under: TianoCore Contribution Agreement 1.1
> Signed-off-by: BobCF <bob.c.feng@intel.com>
> Cc: Liming Gao <liming.gao@intel.com>
> Cc: Jaben Carsey <jaben.carsey@intel.com>
> ---
>  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++----
> --
>  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
>  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
>  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
>  4 files changed, 44 insertions(+), 31 deletions(-)
> 
> diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py
> b/BaseTools/Source/Python/AutoGen/StrGather.py
> index 361d499076..d34a9e9447 100644
> --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
>  # @param UniGenCFlag      UniString is generated into AutoGen C file when it
> is set to True
>  #
>  # @retval Str:           A string of .h file content
>  #
>  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode,
> UniGenCFlag):
> -    Str = ''
> +    Str = []
>      ValueStartPtr = 60
>      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME
> + ' ' * (ValueStartPtr - len(DEFINE_STR +
> LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) +
> COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
>      Line = COMMENT_DEFINE_STR + ' ' +
> PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr -
> len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) +
> DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
> @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName,
> UniObjectClass, IsCompatibleMode, UniGenCFlag):
>                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' +
> DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
>                  else:
>                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr -
> len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) +
> COMMENT_NOT_REFERENCED
>                  UnusedStr = WriteLine(UnusedStr, Line)
> 
> -    Str = ''.join([Str, UnusedStr])
> +    Str.extend( UnusedStr)
> 
>      Str = WriteLine(Str, '')
>      if IsCompatibleMode or UniGenCFlag:
>          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> -    return Str
> +    return "".join(Str)
> 
>  ## Create a complete .h file
>  #
>  # Create a complet .h file with file header and file content
>  #
> @@ -185,11 +185,11 @@ def CreateHFileContent(BaseName,
> UniObjectClass, IsCompatibleMode, UniGenCFlag):
>  # @retval Str:           A string of complete .h file
>  #
>  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode,
> UniGenCFlag):
>      HFile = WriteLine('', CreateHFileContent(BaseName, UniObjectClass,
> IsCompatibleMode, UniGenCFlag))
> 
> -    return HFile
> +    return "".join(HFile)
> 
>  ## Create a buffer to store all items in an array
>  #
>  # @param BinBuffer   Buffer to contain Binary data.
>  # @param Array:      The array need to be formatted
> @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
>  #
>  def CreateArrayItem(Array, Width = 16):
>      MaxLength = Width
>      Index = 0
>      Line = '  '
> -    ArrayItem = ''
> +    ArrayItem = []
> 
>      for Item in Array:
>          if Index < MaxLength:
>              Line = Line + Item + ',  '
>              Index = Index + 1
> @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
>              ArrayItem = WriteLine(ArrayItem, Line)
>              Line = '  ' + Item + ',  '
>              Index = 1
>      ArrayItem = Write(ArrayItem, Line.rstrip())
> 
> -    return ArrayItem
> +    return "".join(ArrayItem)
> 
>  ## CreateCFileStringValue
>  #
>  # Create a line with string value
>  #
> @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
> 
>  def CreateCFileStringValue(Value):
>      Value = [StringBlockType] + Value
>      Str = WriteLine('', CreateArrayItem(Value))
> 
> -    return Str
> +    return "".join(Str)
> 
>  ## GetFilteredLanguage
>  #
>  # apply get best language rules to the UNI language code list
>  #
> @@ -438,11 +438,11 @@ def CreateCFileContent(BaseName,
> UniObjectClass, IsCompatibleMode, UniBinBuffer,
>      #
>      # Join package data
>      #
>      AllStr = Write(AllStr, Str)
> 
> -    return AllStr
> +    return "".join(AllStr)
> 
>  ## Create end of .c file
>  #
>  # Create end of .c file
>  #
> @@ -465,11 +465,11 @@ def CreateCFileEnd():
>  #
>  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
>      CFile = ''
>      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass,
> IsCompatibleMode, None, FilterInfo))
>      CFile = WriteLine(CFile, CreateCFileEnd())
> -    return CFile
> +    return "".join(CFile)
> 
>  ## GetFileList
>  #
>  # Get a list for all files
>  #
> @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList,
> IncludeList, IncludePathList, Ski
> 
>  #
>  # Write an item
>  #
>  def Write(Target, Item):
> -    return ''.join([Target, Item])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item,list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    return Target
> 
>  #
>  # Write an item with a break line
>  #
>  def WriteLine(Target, Item):
> -    return ''.join([Target, Item, '\n'])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item, list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    Target.append('\n')
> +    return Target
> 
>  # This acts like the main() function for the script, unless it is 'import'ed into
> another
>  # script.
>  if __name__ == '__main__':
>      EdkLogger.info('start')
> diff --git a/BaseTools/Source/Python/Common/Misc.py
> b/BaseTools/Source/Python/Common/Misc.py
> index 80236db160..8dcbe141ae 100644
> --- a/BaseTools/Source/Python/Common/Misc.py
> +++ b/BaseTools/Source/Python/Common/Misc.py
> @@ -777,21 +777,21 @@ class TemplateString(object):
> 
>              return "".join(StringList)
> 
>      ## Constructor
>      def __init__(self, Template=None):
> -        self.String = ''
> +        self.String = []
>          self.IsBinary = False
>          self._Template = Template
>          self._TemplateSectionList = self._Parse(Template)
> 
>      ## str() operator
>      #
>      #   @retval     string  The string replaced
>      #
>      def __str__(self):
> -        return self.String
> +        return "".join(self.String)
> 
>      ## Split the template string into fragments per the ${BEGIN} and ${END}
> flags
>      #
>      #   @retval     list    A list of TemplateString.Section objects
>      #
> @@ -835,13 +835,16 @@ class TemplateString(object):
>      #   @param      Dictionary      The placeholder dictionaries
>      #
>      def Append(self, AppendString, Dictionary=None):
>          if Dictionary:
>              SectionList = self._Parse(AppendString)
> -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> +            self.String.append( "".join(S.Instantiate(Dictionary) for S in
> SectionList))
>          else:
> -            self.String += AppendString
> +            if isinstance(AppendString,list):
> +                self.String.extend(AppendString)
> +            else:
> +                self.String.append(AppendString)
> 
>      ## Replace the string template with dictionary of placeholders
>      #
>      #   @param      Dictionary      The placeholder dictionaries
>      #
> @@ -1741,27 +1744,21 @@ class PathClass(object):
>      #
>      # @retval False The two PathClass are different
>      # @retval True  The two PathClass are the same
>      #
>      def __eq__(self, Other):
> -        if isinstance(Other, type(self)):
> -            return self.Path == Other.Path
> -        else:
> -            return self.Path == str(Other)
> +        return self.Path == str(Other)
> 
>      ## Override __cmp__ function
>      #
>      # Customize the comparsion operation of two PathClass
>      #
>      # @retval 0     The two PathClass are different
>      # @retval -1    The first PathClass is less than the second PathClass
>      # @retval 1     The first PathClass is Bigger than the second PathClass
>      def __cmp__(self, Other):
> -        if isinstance(Other, type(self)):
> -            OtherKey = Other.Path
> -        else:
> -            OtherKey = str(Other)
> +        OtherKey = str(Other)
> 
>          SelfKey = self.Path
>          if SelfKey == OtherKey:
>              return 0
>          elif SelfKey > OtherKey:
> diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py
> b/BaseTools/Source/Python/Workspace/InfBuildData.py
> index 44d44d24eb..d615cccdf7 100644
> --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
>          for Record in RecordList:
>              Lib = Record[0]
>              Instance = Record[1]
>              if Instance:
>                  Instance = NormPath(Instance, self._Macros)
> -            RetVal[Lib] = Instance
> +                RetVal[Lib] = Instance
> +            else:
> +                RetVal[Lib] = None
>          return RetVal
> 
>      ## Retrieve library names (for Edk.x style of modules)
>      @cached_property
>      def Libraries(self):
> diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> index 8d8a3e2789..55d01fa4b2 100644
> --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform,
> BuildDatabase, Arch, Target, Toolcha
>      while len(LibraryConsumerList) > 0:
>          M = LibraryConsumerList.pop()
>          for LibraryClassName in M.LibraryClasses:
>              if LibraryClassName not in LibraryInstance:
>                  # override library instance for this module
> -                if LibraryClassName in
> Platform.Modules[str(Module)].LibraryClasses:
> -                    LibraryPath =
> Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> -                else:
> -                    LibraryPath = Platform.LibraryClasses[LibraryClassName,
> ModuleType]
> -                if LibraryPath is None or LibraryPath == "":
> -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> -                    if LibraryPath is None or LibraryPath == "":
> +                LibraryPath =
> Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platfor
> m.LibraryClasses[LibraryClassName, ModuleType])
> +                if LibraryPath is None:
> +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> +                    if LibraryPath is None:
>                          if FileName:
>                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
>                                              "Instance of library class [%s] is not found" %
> LibraryClassName,
>                                              File=FileName,
>                                              ExtraData="in [%s] [%s]\n\tconsumed by module
> [%s]" % (str(M), Arch, str(Module)))
> --
> 2.19.1.windows.1



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-08 10:16 [Patch] BaseTools: Optimize string concatenation BobCF
  2018-11-08 15:26 ` Carsey, Jaben
@ 2018-11-08 16:40 ` Kinney, Michael D
  2018-11-09 14:16   ` Gao, Liming
  2018-11-08 16:52 ` Leif Lindholm
  2 siblings, 1 reply; 18+ messages in thread
From: Kinney, Michael D @ 2018-11-08 16:40 UTC (permalink / raw)
  To: Feng, Bob C, edk2-devel@lists.01.org, Kinney, Michael D
  Cc: Carsey, Jaben, Gao, Liming

Bob,

Is this for edk2/master or for the Python 3 conversion in the
edk2-staging branch?  If it is for the edk-staging branch, then
the Subject is not correct.

Thanks,

Mike

> -----Original Message-----
> From: edk2-devel [mailto:edk2-devel-
> bounces@lists.01.org] On Behalf Of BobCF
> Sent: Thursday, November 8, 2018 2:16 AM
> To: edk2-devel@lists.01.org
> Cc: Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming
> <liming.gao@intel.com>
> Subject: [edk2] [Patch] BaseTools: Optimize string
> concatenation
> 
> https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> 
> This patch is one of build tool performance improvement
> series patches.
> 
> This patch is going to use join function instead of
> string += string2 statement.
> 
> Current code use string += string2 in a loop to combine
> a string. while creating a string list in a loop and
> using
> "".join(stringlist) after the loop will be much faster.
> 
> Contributed-under: TianoCore Contribution Agreement 1.1
> Signed-off-by: BobCF <bob.c.feng@intel.com>
> Cc: Liming Gao <liming.gao@intel.com>
> Cc: Jaben Carsey <jaben.carsey@intel.com>
> ---
>  BaseTools/Source/Python/AutoGen/StrGather.py  | 39
> +++++++++++++------
>  BaseTools/Source/Python/Common/Misc.py        | 21
> +++++-----
>  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
>  .../Python/Workspace/WorkspaceCommon.py       | 11 ++-
> ---
>  4 files changed, 44 insertions(+), 31 deletions(-)
> 
> diff --git
> a/BaseTools/Source/Python/AutoGen/StrGather.py
> b/BaseTools/Source/Python/AutoGen/StrGather.py
> index 361d499076..d34a9e9447 100644
> --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
>  # @param UniGenCFlag      UniString is generated into
> AutoGen C file when it is set to True
>  #
>  # @retval Str:           A string of .h file content
>  #
>  def CreateHFileContent(BaseName, UniObjectClass,
> IsCompatibleMode, UniGenCFlag):
> -    Str = ''
> +    Str = []
>      ValueStartPtr = 60
>      Line = COMMENT_DEFINE_STR + ' ' +
> LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr -
> len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) +
> DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
>      Line = COMMENT_DEFINE_STR + ' ' +
> PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' *
> (ValueStartPtr - len(DEFINE_STR +
> PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1,
> 4) + COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
> @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName,
> UniObjectClass, IsCompatibleMode, UniGenCFlag):
>                      Line = COMMENT_DEFINE_STR + ' ' +
> Name + ' ' + DecToHexStr(Token, 4) +
> COMMENT_NOT_REFERENCED
>                  else:
>                      Line = COMMENT_DEFINE_STR + ' ' +
> Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) +
> DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
>                  UnusedStr = WriteLine(UnusedStr, Line)
> 
> -    Str = ''.join([Str, UnusedStr])
> +    Str.extend( UnusedStr)
> 
>      Str = WriteLine(Str, '')
>      if IsCompatibleMode or UniGenCFlag:
>          Str = WriteLine(Str, 'extern unsigned char ' +
> BaseName + 'Strings[];')
> -    return Str
> +    return "".join(Str)
> 
>  ## Create a complete .h file
>  #
>  # Create a complet .h file with file header and file
> content
>  #
> @@ -185,11 +185,11 @@ def CreateHFileContent(BaseName,
> UniObjectClass, IsCompatibleMode, UniGenCFlag):
>  # @retval Str:           A string of complete .h file
>  #
>  def CreateHFile(BaseName, UniObjectClass,
> IsCompatibleMode, UniGenCFlag):
>      HFile = WriteLine('', CreateHFileContent(BaseName,
> UniObjectClass, IsCompatibleMode, UniGenCFlag))
> 
> -    return HFile
> +    return "".join(HFile)
> 
>  ## Create a buffer to store all items in an array
>  #
>  # @param BinBuffer   Buffer to contain Binary data.
>  # @param Array:      The array need to be formatted
> @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer,
> Array):
>  #
>  def CreateArrayItem(Array, Width = 16):
>      MaxLength = Width
>      Index = 0
>      Line = '  '
> -    ArrayItem = ''
> +    ArrayItem = []
> 
>      for Item in Array:
>          if Index < MaxLength:
>              Line = Line + Item + ',  '
>              Index = Index + 1
> @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width
> = 16):
>              ArrayItem = WriteLine(ArrayItem, Line)
>              Line = '  ' + Item + ',  '
>              Index = 1
>      ArrayItem = Write(ArrayItem, Line.rstrip())
> 
> -    return ArrayItem
> +    return "".join(ArrayItem)
> 
>  ## CreateCFileStringValue
>  #
>  # Create a line with string value
>  #
> @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width
> = 16):
> 
>  def CreateCFileStringValue(Value):
>      Value = [StringBlockType] + Value
>      Str = WriteLine('', CreateArrayItem(Value))
> 
> -    return Str
> +    return "".join(Str)
> 
>  ## GetFilteredLanguage
>  #
>  # apply get best language rules to the UNI language
> code list
>  #
> @@ -438,11 +438,11 @@ def CreateCFileContent(BaseName,
> UniObjectClass, IsCompatibleMode, UniBinBuffer,
>      #
>      # Join package data
>      #
>      AllStr = Write(AllStr, Str)
> 
> -    return AllStr
> +    return "".join(AllStr)
> 
>  ## Create end of .c file
>  #
>  # Create end of .c file
>  #
> @@ -465,11 +465,11 @@ def CreateCFileEnd():
>  #
>  def CreateCFile(BaseName, UniObjectClass,
> IsCompatibleMode, FilterInfo):
>      CFile = ''
>      CFile = WriteLine(CFile,
> CreateCFileContent(BaseName, UniObjectClass,
> IsCompatibleMode, None, FilterInfo))
>      CFile = WriteLine(CFile, CreateCFileEnd())
> -    return CFile
> +    return "".join(CFile)
> 
>  ## GetFileList
>  #
>  # Get a list for all files
>  #
> @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList,
> SourceFileList, IncludeList, IncludePathList, Ski
> 
>  #
>  # Write an item
>  #
>  def Write(Target, Item):
> -    return ''.join([Target, Item])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item,list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    return Target
> 
>  #
>  # Write an item with a break line
>  #
>  def WriteLine(Target, Item):
> -    return ''.join([Target, Item, '\n'])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item, list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    Target.append('\n')
> +    return Target
> 
>  # This acts like the main() function for the script,
> unless it is 'import'ed into another
>  # script.
>  if __name__ == '__main__':
>      EdkLogger.info('start')
> diff --git a/BaseTools/Source/Python/Common/Misc.py
> b/BaseTools/Source/Python/Common/Misc.py
> index 80236db160..8dcbe141ae 100644
> --- a/BaseTools/Source/Python/Common/Misc.py
> +++ b/BaseTools/Source/Python/Common/Misc.py
> @@ -777,21 +777,21 @@ class TemplateString(object):
> 
>              return "".join(StringList)
> 
>      ## Constructor
>      def __init__(self, Template=None):
> -        self.String = ''
> +        self.String = []
>          self.IsBinary = False
>          self._Template = Template
>          self._TemplateSectionList =
> self._Parse(Template)
> 
>      ## str() operator
>      #
>      #   @retval     string  The string replaced
>      #
>      def __str__(self):
> -        return self.String
> +        return "".join(self.String)
> 
>      ## Split the template string into fragments per
> the ${BEGIN} and ${END} flags
>      #
>      #   @retval     list    A list of
> TemplateString.Section objects
>      #
> @@ -835,13 +835,16 @@ class TemplateString(object):
>      #   @param      Dictionary      The placeholder
> dictionaries
>      #
>      def Append(self, AppendString, Dictionary=None):
>          if Dictionary:
>              SectionList = self._Parse(AppendString)
> -            self.String +=
> "".join(S.Instantiate(Dictionary) for S in SectionList)
> +            self.String.append(
> "".join(S.Instantiate(Dictionary) for S in
> SectionList))
>          else:
> -            self.String += AppendString
> +            if isinstance(AppendString,list):
> +                self.String.extend(AppendString)
> +            else:
> +                self.String.append(AppendString)
> 
>      ## Replace the string template with dictionary of
> placeholders
>      #
>      #   @param      Dictionary      The placeholder
> dictionaries
>      #
> @@ -1741,27 +1744,21 @@ class PathClass(object):
>      #
>      # @retval False The two PathClass are different
>      # @retval True  The two PathClass are the same
>      #
>      def __eq__(self, Other):
> -        if isinstance(Other, type(self)):
> -            return self.Path == Other.Path
> -        else:
> -            return self.Path == str(Other)
> +        return self.Path == str(Other)
> 
>      ## Override __cmp__ function
>      #
>      # Customize the comparsion operation of two
> PathClass
>      #
>      # @retval 0     The two PathClass are different
>      # @retval -1    The first PathClass is less than
> the second PathClass
>      # @retval 1     The first PathClass is Bigger than
> the second PathClass
>      def __cmp__(self, Other):
> -        if isinstance(Other, type(self)):
> -            OtherKey = Other.Path
> -        else:
> -            OtherKey = str(Other)
> +        OtherKey = str(Other)
> 
>          SelfKey = self.Path
>          if SelfKey == OtherKey:
>              return 0
>          elif SelfKey > OtherKey:
> diff --git
> a/BaseTools/Source/Python/Workspace/InfBuildData.py
> b/BaseTools/Source/Python/Workspace/InfBuildData.py
> index 44d44d24eb..d615cccdf7 100644
> --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> @@ -612,11 +612,13 @@ class
> InfBuildData(ModuleBuildClassObject):
>          for Record in RecordList:
>              Lib = Record[0]
>              Instance = Record[1]
>              if Instance:
>                  Instance = NormPath(Instance,
> self._Macros)
> -            RetVal[Lib] = Instance
> +                RetVal[Lib] = Instance
> +            else:
> +                RetVal[Lib] = None
>          return RetVal
> 
>      ## Retrieve library names (for Edk.x style of
> modules)
>      @cached_property
>      def Libraries(self):
> diff --git
> a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> index 8d8a3e2789..55d01fa4b2 100644
> ---
> a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> +++
> b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module,
> Platform, BuildDatabase, Arch, Target, Toolcha
>      while len(LibraryConsumerList) > 0:
>          M = LibraryConsumerList.pop()
>          for LibraryClassName in M.LibraryClasses:
>              if LibraryClassName not in
> LibraryInstance:
>                  # override library instance for this
> module
> -                if LibraryClassName in
> Platform.Modules[str(Module)].LibraryClasses:
> -                    LibraryPath =
> Platform.Modules[str(Module)].LibraryClasses[LibraryCla
> ssName]
> -                else:
> -                    LibraryPath =
> Platform.LibraryClasses[LibraryClassName, ModuleType]
> -                if LibraryPath is None or LibraryPath
> == "":
> -                    LibraryPath =
> M.LibraryClasses[LibraryClassName]
> -                    if LibraryPath is None or
> LibraryPath == "":
> +                LibraryPath =
> Platform.Modules[str(Module)].LibraryClasses.get(Librar
> yClassName,Platform.LibraryClasses[LibraryClassName,
> ModuleType])
> +                if LibraryPath is None:
> +                    LibraryPath =
> M.LibraryClasses.get(LibraryClassName)
> +                    if LibraryPath is None:
>                          if FileName:
>                              EdkLogger.error("build",
> RESOURCE_NOT_AVAILABLE,
>                                              "Instance
> of library class [%s] is not found" % LibraryClassName,
> 
> File=FileName,
> 
> ExtraData="in [%s] [%s]\n\tconsumed by module [%s]" %
> (str(M), Arch, str(Module)))
> --
> 2.19.1.windows.1
> 
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-08 10:16 [Patch] BaseTools: Optimize string concatenation BobCF
  2018-11-08 15:26 ` Carsey, Jaben
  2018-11-08 16:40 ` Kinney, Michael D
@ 2018-11-08 16:52 ` Leif Lindholm
  2018-11-09  3:25   ` Feng, Bob C
  2 siblings, 1 reply; 18+ messages in thread
From: Leif Lindholm @ 2018-11-08 16:52 UTC (permalink / raw)
  To: BobCF; +Cc: edk2-devel, Jaben Carsey, Liming Gao

On Thu, Nov 08, 2018 at 06:16:25PM +0800, BobCF wrote:
> https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> 
> This patch is one of build tool performance improvement
> series patches.
> 
> This patch is going to use join function instead of
> string += string2 statement.
> 
> Current code use string += string2 in a loop to combine
> a string. while creating a string list in a loop and using
> "".join(stringlist) after the loop will be much faster.

Do you have any numbers on the level of improvement seen?

Either for the individual scripts when called identically, or (if
measurable) on the build of an entire platform (say OvmfX64?).

Regards,

Leif

> Contributed-under: TianoCore Contribution Agreement 1.1
> Signed-off-by: BobCF <bob.c.feng@intel.com>
> Cc: Liming Gao <liming.gao@intel.com>
> Cc: Jaben Carsey <jaben.carsey@intel.com>
> ---
>  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
>  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
>  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
>  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
>  4 files changed, 44 insertions(+), 31 deletions(-)
> 
> diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py b/BaseTools/Source/Python/AutoGen/StrGather.py
> index 361d499076..d34a9e9447 100644
> --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
>  # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
>  #
>  # @retval Str:           A string of .h file content
>  #
>  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> -    Str = ''
> +    Str = []
>      ValueStartPtr = 60
>      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
>      Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
> @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
>                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
>                  else:
>                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
>                  UnusedStr = WriteLine(UnusedStr, Line)
>  
> -    Str = ''.join([Str, UnusedStr])
> +    Str.extend( UnusedStr)
>  
>      Str = WriteLine(Str, '')
>      if IsCompatibleMode or UniGenCFlag:
>          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> -    return Str
> +    return "".join(Str)
>  
>  ## Create a complete .h file
>  #
>  # Create a complet .h file with file header and file content
>  #
> @@ -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
>  # @retval Str:           A string of complete .h file
>  #
>  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
>      HFile = WriteLine('', CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag))
>  
> -    return HFile
> +    return "".join(HFile)
>  
>  ## Create a buffer to store all items in an array
>  #
>  # @param BinBuffer   Buffer to contain Binary data.
>  # @param Array:      The array need to be formatted
> @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
>  #
>  def CreateArrayItem(Array, Width = 16):
>      MaxLength = Width
>      Index = 0
>      Line = '  '
> -    ArrayItem = ''
> +    ArrayItem = []
>  
>      for Item in Array:
>          if Index < MaxLength:
>              Line = Line + Item + ',  '
>              Index = Index + 1
> @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
>              ArrayItem = WriteLine(ArrayItem, Line)
>              Line = '  ' + Item + ',  '
>              Index = 1
>      ArrayItem = Write(ArrayItem, Line.rstrip())
>  
> -    return ArrayItem
> +    return "".join(ArrayItem)
>  
>  ## CreateCFileStringValue
>  #
>  # Create a line with string value
>  #
> @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
>  
>  def CreateCFileStringValue(Value):
>      Value = [StringBlockType] + Value
>      Str = WriteLine('', CreateArrayItem(Value))
>  
> -    return Str
> +    return "".join(Str)
>  
>  ## GetFilteredLanguage
>  #
>  # apply get best language rules to the UNI language code list
>  #
> @@ -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
>      #
>      # Join package data
>      #
>      AllStr = Write(AllStr, Str)
>  
> -    return AllStr
> +    return "".join(AllStr)
>  
>  ## Create end of .c file
>  #
>  # Create end of .c file
>  #
> @@ -465,11 +465,11 @@ def CreateCFileEnd():
>  #
>  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
>      CFile = ''
>      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
>      CFile = WriteLine(CFile, CreateCFileEnd())
> -    return CFile
> +    return "".join(CFile)
>  
>  ## GetFileList
>  #
>  # Get a list for all files
>  #
> @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList, IncludeList, IncludePathList, Ski
>  
>  #
>  # Write an item
>  #
>  def Write(Target, Item):
> -    return ''.join([Target, Item])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item,list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    return Target
>  
>  #
>  # Write an item with a break line
>  #
>  def WriteLine(Target, Item):
> -    return ''.join([Target, Item, '\n'])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item, list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    Target.append('\n')
> +    return Target
>  
>  # This acts like the main() function for the script, unless it is 'import'ed into another
>  # script.
>  if __name__ == '__main__':
>      EdkLogger.info('start')
> diff --git a/BaseTools/Source/Python/Common/Misc.py b/BaseTools/Source/Python/Common/Misc.py
> index 80236db160..8dcbe141ae 100644
> --- a/BaseTools/Source/Python/Common/Misc.py
> +++ b/BaseTools/Source/Python/Common/Misc.py
> @@ -777,21 +777,21 @@ class TemplateString(object):
>  
>              return "".join(StringList)
>  
>      ## Constructor
>      def __init__(self, Template=None):
> -        self.String = ''
> +        self.String = []
>          self.IsBinary = False
>          self._Template = Template
>          self._TemplateSectionList = self._Parse(Template)
>  
>      ## str() operator
>      #
>      #   @retval     string  The string replaced
>      #
>      def __str__(self):
> -        return self.String
> +        return "".join(self.String)
>  
>      ## Split the template string into fragments per the ${BEGIN} and ${END} flags
>      #
>      #   @retval     list    A list of TemplateString.Section objects
>      #
> @@ -835,13 +835,16 @@ class TemplateString(object):
>      #   @param      Dictionary      The placeholder dictionaries
>      #
>      def Append(self, AppendString, Dictionary=None):
>          if Dictionary:
>              SectionList = self._Parse(AppendString)
> -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> +            self.String.append( "".join(S.Instantiate(Dictionary) for S in SectionList))
>          else:
> -            self.String += AppendString
> +            if isinstance(AppendString,list):
> +                self.String.extend(AppendString)
> +            else:
> +                self.String.append(AppendString)
>  
>      ## Replace the string template with dictionary of placeholders
>      #
>      #   @param      Dictionary      The placeholder dictionaries
>      #
> @@ -1741,27 +1744,21 @@ class PathClass(object):
>      #
>      # @retval False The two PathClass are different
>      # @retval True  The two PathClass are the same
>      #
>      def __eq__(self, Other):
> -        if isinstance(Other, type(self)):
> -            return self.Path == Other.Path
> -        else:
> -            return self.Path == str(Other)
> +        return self.Path == str(Other)
>  
>      ## Override __cmp__ function
>      #
>      # Customize the comparsion operation of two PathClass
>      #
>      # @retval 0     The two PathClass are different
>      # @retval -1    The first PathClass is less than the second PathClass
>      # @retval 1     The first PathClass is Bigger than the second PathClass
>      def __cmp__(self, Other):
> -        if isinstance(Other, type(self)):
> -            OtherKey = Other.Path
> -        else:
> -            OtherKey = str(Other)
> +        OtherKey = str(Other)
>  
>          SelfKey = self.Path
>          if SelfKey == OtherKey:
>              return 0
>          elif SelfKey > OtherKey:
> diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py b/BaseTools/Source/Python/Workspace/InfBuildData.py
> index 44d44d24eb..d615cccdf7 100644
> --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
>          for Record in RecordList:
>              Lib = Record[0]
>              Instance = Record[1]
>              if Instance:
>                  Instance = NormPath(Instance, self._Macros)
> -            RetVal[Lib] = Instance
> +                RetVal[Lib] = Instance
> +            else:
> +                RetVal[Lib] = None
>          return RetVal
>  
>      ## Retrieve library names (for Edk.x style of modules)
>      @cached_property
>      def Libraries(self):
> diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> index 8d8a3e2789..55d01fa4b2 100644
> --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
>      while len(LibraryConsumerList) > 0:
>          M = LibraryConsumerList.pop()
>          for LibraryClassName in M.LibraryClasses:
>              if LibraryClassName not in LibraryInstance:
>                  # override library instance for this module
> -                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
> -                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> -                else:
> -                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
> -                if LibraryPath is None or LibraryPath == "":
> -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> -                    if LibraryPath is None or LibraryPath == "":
> +                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
> +                if LibraryPath is None:
> +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> +                    if LibraryPath is None:
>                          if FileName:
>                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
>                                              "Instance of library class [%s] is not found" % LibraryClassName,
>                                              File=FileName,
>                                              ExtraData="in [%s] [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
> -- 
> 2.19.1.windows.1
> 
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-08 16:52 ` Leif Lindholm
@ 2018-11-09  3:25   ` Feng, Bob C
  2018-11-09 11:48     ` Leif Lindholm
  0 siblings, 1 reply; 18+ messages in thread
From: Feng, Bob C @ 2018-11-09  3:25 UTC (permalink / raw)
  To: Leif Lindholm; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

Hi Leif,

Yes. I should show the data.

My unites scripts is as below. The parameter lines is a string list which size is 43395. The test result is

''.join(String list) time:  0.042262
String += String time  :  3.822699

def TestPlus(lines):
    str_target = ""
    
    for line in lines:
        str_target += line
        
    return str_target

def TestJoin(lines):
    str_target = []
    
    for line in lines:
        str_target.append(line)
        
    return "".join(str_target)

def CompareStrCat():
    lines = GetStrings()
    print (len(lines))
    
    begin = time.perf_counter()
    for _ in range(10):
        TestJoin(lines)
    end = time.perf_counter() - begin
    print ("''.join(String list) time: %f" % end)
    
    begin = time.perf_counter()
    for _ in range(10):
        TestPlus(lines)
    end = time.perf_counter() - begin
    print ("String += String time: %f" % end)

For build OvmfX64, it's not very effective, it saves 2~3 second in Parse/AutoGen phase, because OvmfX64 is relatively simple. It does not enable much features such as Multiple SKU and structure PCD by default and there is no big size Autogen.c/Autogen.h/Makefile generated either. but for the complex platform, this patch will be much effective. The unites above simulates a real case that there is a 43395 lines of Autogen.c generated.

Since this patch mostly effect the Parser/AutoGen phase, I just use "build genmake" to show the improvement data. 
The final result for clean build is:
Current code:  17 seconds
After patch:      15 seconds

Details:
Current data:

d:\edk2 (master -> origin)
λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -t VS2015x86
Build environment: Windows-10-10.0.10240
Build start time: 10:12:32, Nov.09 2018

WORKSPACE        = d:\edk2
ECP_SOURCE       = d:\edk2\edkcompatibilitypkg
EDK_SOURCE       = d:\edk2\edkcompatibilitypkg
EFI_SOURCE       = d:\edk2\edkcompatibilitypkg
EDK_TOOLS_PATH   = d:\edk2\basetools
EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32
CONF_PATH        = d:\edk2\conf

Architecture(s)  = IA32 X64
Build target     = DEBUG
Toolchain        = VS2015x86

Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc
Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf

Processing meta-data ....... done!
Generating code . done!
Generating makefile . done!
Generating code .. done!
Generating makefile ...... done!

- Done -
Build end time: 10:12:49, Nov.09 2018
Build total time: 00:00:17

After applying this patch:

d:\edk2 (master -> origin)                                    
λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -
Build environment: Windows-10-10.0.10240                      
Build start time: 10:11:41, Nov.09 2018                       
                                                              
WORKSPACE        = d:\edk2                                    
ECP_SOURCE       = d:\edk2\edkcompatibilitypkg                
EDK_SOURCE       = d:\edk2\edkcompatibilitypkg                
EFI_SOURCE       = d:\edk2\edkcompatibilitypkg                
EDK_TOOLS_PATH   = d:\edk2\basetools                          
EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32                
CONF_PATH        = d:\edk2\conf                               
                                                              
                                                              
Architecture(s)  = IA32 X64                                   
Build target     = DEBUG                                      
Toolchain        = VS2015x86                                  
                                                              
Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc 
Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf 
                                                              
Processing meta-data ..... done!                              
Generating code . done!                                       
Generating makefile . done!                                   
Generating code .. done!                                      
Generating makefile ...... done!                              
                                                              
- Done -                                                      
Build end time: 10:11:56, Nov.09 2018                         
Build total time: 00:00:15                                    


Thanks,
Bob


-----Original Message-----
From: Leif Lindholm [mailto:leif.lindholm@linaro.org] 
Sent: Friday, November 9, 2018 12:53 AM
To: Feng, Bob C <bob.c.feng@intel.com>
Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation

On Thu, Nov 08, 2018 at 06:16:25PM +0800, BobCF wrote:
> https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> 
> This patch is one of build tool performance improvement series 
> patches.
> 
> This patch is going to use join function instead of string += string2 
> statement.
> 
> Current code use string += string2 in a loop to combine a string. 
> while creating a string list in a loop and using
> "".join(stringlist) after the loop will be much faster.

Do you have any numbers on the level of improvement seen?

Either for the individual scripts when called identically, or (if
measurable) on the build of an entire platform (say OvmfX64?).

Regards,

Leif

> Contributed-under: TianoCore Contribution Agreement 1.1
> Signed-off-by: BobCF <bob.c.feng@intel.com>
> Cc: Liming Gao <liming.gao@intel.com>
> Cc: Jaben Carsey <jaben.carsey@intel.com>
> ---
>  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
>  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
>  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
>  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
>  4 files changed, 44 insertions(+), 31 deletions(-)
> 
> diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py 
> b/BaseTools/Source/Python/AutoGen/StrGather.py
> index 361d499076..d34a9e9447 100644
> --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
>  # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
>  #
>  # @retval Str:           A string of .h file content
>  #
>  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> -    Str = ''
> +    Str = []
>      ValueStartPtr = 60
>      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
>      Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
>      Str = WriteLine(Str, Line)
> @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
>                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
>                  else:
>                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
>                  UnusedStr = WriteLine(UnusedStr, Line)
>  
> -    Str = ''.join([Str, UnusedStr])
> +    Str.extend( UnusedStr)
>  
>      Str = WriteLine(Str, '')
>      if IsCompatibleMode or UniGenCFlag:
>          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> -    return Str
> +    return "".join(Str)
>  
>  ## Create a complete .h file
>  #
>  # Create a complet .h file with file header and file content  # @@ 
> -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
>  # @retval Str:           A string of complete .h file
>  #
>  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
>      HFile = WriteLine('', CreateHFileContent(BaseName, 
> UniObjectClass, IsCompatibleMode, UniGenCFlag))
>  
> -    return HFile
> +    return "".join(HFile)
>  
>  ## Create a buffer to store all items in an array  #
>  # @param BinBuffer   Buffer to contain Binary data.
>  # @param Array:      The array need to be formatted
> @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
>  #
>  def CreateArrayItem(Array, Width = 16):
>      MaxLength = Width
>      Index = 0
>      Line = '  '
> -    ArrayItem = ''
> +    ArrayItem = []
>  
>      for Item in Array:
>          if Index < MaxLength:
>              Line = Line + Item + ',  '
>              Index = Index + 1
> @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
>              ArrayItem = WriteLine(ArrayItem, Line)
>              Line = '  ' + Item + ',  '
>              Index = 1
>      ArrayItem = Write(ArrayItem, Line.rstrip())
>  
> -    return ArrayItem
> +    return "".join(ArrayItem)
>  
>  ## CreateCFileStringValue
>  #
>  # Create a line with string value
>  #
> @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
>  
>  def CreateCFileStringValue(Value):
>      Value = [StringBlockType] + Value
>      Str = WriteLine('', CreateArrayItem(Value))
>  
> -    return Str
> +    return "".join(Str)
>  
>  ## GetFilteredLanguage
>  #
>  # apply get best language rules to the UNI language code list  # @@ 
> -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
>      #
>      # Join package data
>      #
>      AllStr = Write(AllStr, Str)
>  
> -    return AllStr
> +    return "".join(AllStr)
>  
>  ## Create end of .c file
>  #
>  # Create end of .c file
>  #
> @@ -465,11 +465,11 @@ def CreateCFileEnd():
>  #
>  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
>      CFile = ''
>      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
>      CFile = WriteLine(CFile, CreateCFileEnd())
> -    return CFile
> +    return "".join(CFile)
>  
>  ## GetFileList
>  #
>  # Get a list for all files
>  #
> @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList, 
> IncludeList, IncludePathList, Ski
>  
>  #
>  # Write an item
>  #
>  def Write(Target, Item):
> -    return ''.join([Target, Item])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item,list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    return Target
>  
>  #
>  # Write an item with a break line
>  #
>  def WriteLine(Target, Item):
> -    return ''.join([Target, Item, '\n'])
> +    if isinstance(Target,str):
> +        Target = [Target]
> +    if not Target:
> +        Target = []
> +    if isinstance(Item, list):
> +        Target.extend(Item)
> +    else:
> +        Target.append(Item)
> +    Target.append('\n')
> +    return Target
>  
>  # This acts like the main() function for the script, unless it is 
> 'import'ed into another  # script.
>  if __name__ == '__main__':
>      EdkLogger.info('start')
> diff --git a/BaseTools/Source/Python/Common/Misc.py 
> b/BaseTools/Source/Python/Common/Misc.py
> index 80236db160..8dcbe141ae 100644
> --- a/BaseTools/Source/Python/Common/Misc.py
> +++ b/BaseTools/Source/Python/Common/Misc.py
> @@ -777,21 +777,21 @@ class TemplateString(object):
>  
>              return "".join(StringList)
>  
>      ## Constructor
>      def __init__(self, Template=None):
> -        self.String = ''
> +        self.String = []
>          self.IsBinary = False
>          self._Template = Template
>          self._TemplateSectionList = self._Parse(Template)
>  
>      ## str() operator
>      #
>      #   @retval     string  The string replaced
>      #
>      def __str__(self):
> -        return self.String
> +        return "".join(self.String)
>  
>      ## Split the template string into fragments per the ${BEGIN} and ${END} flags
>      #
>      #   @retval     list    A list of TemplateString.Section objects
>      #
> @@ -835,13 +835,16 @@ class TemplateString(object):
>      #   @param      Dictionary      The placeholder dictionaries
>      #
>      def Append(self, AppendString, Dictionary=None):
>          if Dictionary:
>              SectionList = self._Parse(AppendString)
> -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> +            self.String.append( "".join(S.Instantiate(Dictionary) for 
> + S in SectionList))
>          else:
> -            self.String += AppendString
> +            if isinstance(AppendString,list):
> +                self.String.extend(AppendString)
> +            else:
> +                self.String.append(AppendString)
>  
>      ## Replace the string template with dictionary of placeholders
>      #
>      #   @param      Dictionary      The placeholder dictionaries
>      #
> @@ -1741,27 +1744,21 @@ class PathClass(object):
>      #
>      # @retval False The two PathClass are different
>      # @retval True  The two PathClass are the same
>      #
>      def __eq__(self, Other):
> -        if isinstance(Other, type(self)):
> -            return self.Path == Other.Path
> -        else:
> -            return self.Path == str(Other)
> +        return self.Path == str(Other)
>  
>      ## Override __cmp__ function
>      #
>      # Customize the comparsion operation of two PathClass
>      #
>      # @retval 0     The two PathClass are different
>      # @retval -1    The first PathClass is less than the second PathClass
>      # @retval 1     The first PathClass is Bigger than the second PathClass
>      def __cmp__(self, Other):
> -        if isinstance(Other, type(self)):
> -            OtherKey = Other.Path
> -        else:
> -            OtherKey = str(Other)
> +        OtherKey = str(Other)
>  
>          SelfKey = self.Path
>          if SelfKey == OtherKey:
>              return 0
>          elif SelfKey > OtherKey:
> diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py 
> b/BaseTools/Source/Python/Workspace/InfBuildData.py
> index 44d44d24eb..d615cccdf7 100644
> --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
>          for Record in RecordList:
>              Lib = Record[0]
>              Instance = Record[1]
>              if Instance:
>                  Instance = NormPath(Instance, self._Macros)
> -            RetVal[Lib] = Instance
> +                RetVal[Lib] = Instance
> +            else:
> +                RetVal[Lib] = None
>          return RetVal
>  
>      ## Retrieve library names (for Edk.x style of modules)
>      @cached_property
>      def Libraries(self):
> diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py 
> b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> index 8d8a3e2789..55d01fa4b2 100644
> --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
>      while len(LibraryConsumerList) > 0:
>          M = LibraryConsumerList.pop()
>          for LibraryClassName in M.LibraryClasses:
>              if LibraryClassName not in LibraryInstance:
>                  # override library instance for this module
> -                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
> -                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> -                else:
> -                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
> -                if LibraryPath is None or LibraryPath == "":
> -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> -                    if LibraryPath is None or LibraryPath == "":
> +                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
> +                if LibraryPath is None:
> +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> +                    if LibraryPath is None:
>                          if FileName:
>                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
>                                              "Instance of library class [%s] is not found" % LibraryClassName,
>                                              File=FileName,
>                                              ExtraData="in [%s] 
> [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
> --
> 2.19.1.windows.1
> 
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-09  3:25   ` Feng, Bob C
@ 2018-11-09 11:48     ` Leif Lindholm
  2018-11-11  0:41       ` Feng, Bob C
  0 siblings, 1 reply; 18+ messages in thread
From: Leif Lindholm @ 2018-11-09 11:48 UTC (permalink / raw)
  To: Feng, Bob C; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

Hi Bob,

On Fri, Nov 09, 2018 at 03:25:04AM +0000, Feng, Bob C wrote:
> Yes. I should show the data.
> 
> My unites scripts is as below. The parameter lines is a string list which size is 43395. The test result is
> 
> ''.join(String list) time:  0.042262
> String += String time  :  3.822699
> 
> def TestPlus(lines):
>     str_target = ""
>     
>     for line in lines:
>         str_target += line
>         
>     return str_target
> 
> def TestJoin(lines):
>     str_target = []
>     
>     for line in lines:
>         str_target.append(line)
>         
>     return "".join(str_target)
> 
> def CompareStrCat():
>     lines = GetStrings()
>     print (len(lines))
>     
>     begin = time.perf_counter()
>     for _ in range(10):
>         TestJoin(lines)
>     end = time.perf_counter() - begin
>     print ("''.join(String list) time: %f" % end)
>     
>     begin = time.perf_counter()
>     for _ in range(10):
>         TestPlus(lines)
>     end = time.perf_counter() - begin
>     print ("String += String time: %f" % end)
> 
> For build OvmfX64, it's not very effective, it saves 2~3 second in
> Parse/AutoGen phase, because OvmfX64 is relatively simple. It does
> not enable much features such as Multiple SKU and structure PCD by
> default and there is no big size Autogen.c/Autogen.h/Makefile
> generated either. but for the complex platform, this patch will be
> much effective. The unites above simulates a real case that there is
> a 43395 lines of Autogen.c generated.

I beg to differ. Shaving 2-3 seconds off the autogen phase is a
substantial improvement.

However, on my wimpy 24-core Cortex-A53 system, the effect is not
noticeable (fluctuates between 1:56-1:58 whether with or without this
patch).

And even on my x86 workstation, I see no measurable difference (12-13s).
What is the hardware you are benchmarking on?

It does not appear to be detrimental to performance on any of my
platforms, but I would like to see some more measurements, and I would
like to see that logged with some more detail in bugzilla.

Regards,

Leif

> Since this patch mostly effect the Parser/AutoGen phase, I just use
> "build genmake" to show the improvement data.
> The final result for clean build is:
> Current code:  17 seconds
> After patch:      15 seconds
> 
> Details:
> Current data:
> 
> d:\edk2 (master -> origin)
> λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -t VS2015x86
> Build environment: Windows-10-10.0.10240
> Build start time: 10:12:32, Nov.09 2018
> 
> WORKSPACE        = d:\edk2
> ECP_SOURCE       = d:\edk2\edkcompatibilitypkg
> EDK_SOURCE       = d:\edk2\edkcompatibilitypkg
> EFI_SOURCE       = d:\edk2\edkcompatibilitypkg
> EDK_TOOLS_PATH   = d:\edk2\basetools
> EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32
> CONF_PATH        = d:\edk2\conf
> 
> Architecture(s)  = IA32 X64
> Build target     = DEBUG
> Toolchain        = VS2015x86
> 
> Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc
> Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf
> 
> Processing meta-data ....... done!
> Generating code . done!
> Generating makefile . done!
> Generating code .. done!
> Generating makefile ...... done!
> 
> - Done -
> Build end time: 10:12:49, Nov.09 2018
> Build total time: 00:00:17
> 
> After applying this patch:
> 
> d:\edk2 (master -> origin)                                    
> λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -
> Build environment: Windows-10-10.0.10240                      
> Build start time: 10:11:41, Nov.09 2018                       
>                                                               
> WORKSPACE        = d:\edk2                                    
> ECP_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EDK_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EFI_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EDK_TOOLS_PATH   = d:\edk2\basetools                          
> EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32                
> CONF_PATH        = d:\edk2\conf                               
>                                                               
>                                                               
> Architecture(s)  = IA32 X64                                   
> Build target     = DEBUG                                      
> Toolchain        = VS2015x86                                  
>                                                               
> Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc 
> Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf 
>                                                               
> Processing meta-data ..... done!                              
> Generating code . done!                                       
> Generating makefile . done!                                   
> Generating code .. done!                                      
> Generating makefile ...... done!                              
>                                                               
> - Done -                                                      
> Build end time: 10:11:56, Nov.09 2018                         
> Build total time: 00:00:15                                    
> 
> 
> Thanks,
> Bob
> 
> 
> -----Original Message-----
> From: Leif Lindholm [mailto:leif.lindholm@linaro.org] 
> Sent: Friday, November 9, 2018 12:53 AM
> To: Feng, Bob C <bob.c.feng@intel.com>
> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> On Thu, Nov 08, 2018 at 06:16:25PM +0800, BobCF wrote:
> > https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> > 
> > This patch is one of build tool performance improvement series 
> > patches.
> > 
> > This patch is going to use join function instead of string += string2 
> > statement.
> > 
> > Current code use string += string2 in a loop to combine a string. 
> > while creating a string list in a loop and using
> > "".join(stringlist) after the loop will be much faster.
> 
> Do you have any numbers on the level of improvement seen?
> 
> Either for the individual scripts when called identically, or (if
> measurable) on the build of an entire platform (say OvmfX64?).
> 
> Regards,
> 
> Leif
> 
> > Contributed-under: TianoCore Contribution Agreement 1.1
> > Signed-off-by: BobCF <bob.c.feng@intel.com>
> > Cc: Liming Gao <liming.gao@intel.com>
> > Cc: Jaben Carsey <jaben.carsey@intel.com>
> > ---
> >  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
> >  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
> >  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
> >  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
> >  4 files changed, 44 insertions(+), 31 deletions(-)
> > 
> > diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py 
> > b/BaseTools/Source/Python/AutoGen/StrGather.py
> > index 361d499076..d34a9e9447 100644
> > --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> > +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> > @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
> >  # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
> >  #
> >  # @retval Str:           A string of .h file content
> >  #
> >  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > -    Str = ''
> > +    Str = []
> >      ValueStartPtr = 60
> >      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> >      Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> > @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> >                  else:
> >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> >                  UnusedStr = WriteLine(UnusedStr, Line)
> >  
> > -    Str = ''.join([Str, UnusedStr])
> > +    Str.extend( UnusedStr)
> >  
> >      Str = WriteLine(Str, '')
> >      if IsCompatibleMode or UniGenCFlag:
> >          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> > -    return Str
> > +    return "".join(Str)
> >  
> >  ## Create a complete .h file
> >  #
> >  # Create a complet .h file with file header and file content  # @@ 
> > -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >  # @retval Str:           A string of complete .h file
> >  #
> >  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >      HFile = WriteLine('', CreateHFileContent(BaseName, 
> > UniObjectClass, IsCompatibleMode, UniGenCFlag))
> >  
> > -    return HFile
> > +    return "".join(HFile)
> >  
> >  ## Create a buffer to store all items in an array  #
> >  # @param BinBuffer   Buffer to contain Binary data.
> >  # @param Array:      The array need to be formatted
> > @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
> >  #
> >  def CreateArrayItem(Array, Width = 16):
> >      MaxLength = Width
> >      Index = 0
> >      Line = '  '
> > -    ArrayItem = ''
> > +    ArrayItem = []
> >  
> >      for Item in Array:
> >          if Index < MaxLength:
> >              Line = Line + Item + ',  '
> >              Index = Index + 1
> > @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
> >              ArrayItem = WriteLine(ArrayItem, Line)
> >              Line = '  ' + Item + ',  '
> >              Index = 1
> >      ArrayItem = Write(ArrayItem, Line.rstrip())
> >  
> > -    return ArrayItem
> > +    return "".join(ArrayItem)
> >  
> >  ## CreateCFileStringValue
> >  #
> >  # Create a line with string value
> >  #
> > @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
> >  
> >  def CreateCFileStringValue(Value):
> >      Value = [StringBlockType] + Value
> >      Str = WriteLine('', CreateArrayItem(Value))
> >  
> > -    return Str
> > +    return "".join(Str)
> >  
> >  ## GetFilteredLanguage
> >  #
> >  # apply get best language rules to the UNI language code list  # @@ 
> > -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
> >      #
> >      # Join package data
> >      #
> >      AllStr = Write(AllStr, Str)
> >  
> > -    return AllStr
> > +    return "".join(AllStr)
> >  
> >  ## Create end of .c file
> >  #
> >  # Create end of .c file
> >  #
> > @@ -465,11 +465,11 @@ def CreateCFileEnd():
> >  #
> >  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
> >      CFile = ''
> >      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
> >      CFile = WriteLine(CFile, CreateCFileEnd())
> > -    return CFile
> > +    return "".join(CFile)
> >  
> >  ## GetFileList
> >  #
> >  # Get a list for all files
> >  #
> > @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList, 
> > IncludeList, IncludePathList, Ski
> >  
> >  #
> >  # Write an item
> >  #
> >  def Write(Target, Item):
> > -    return ''.join([Target, Item])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item,list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    return Target
> >  
> >  #
> >  # Write an item with a break line
> >  #
> >  def WriteLine(Target, Item):
> > -    return ''.join([Target, Item, '\n'])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item, list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    Target.append('\n')
> > +    return Target
> >  
> >  # This acts like the main() function for the script, unless it is 
> > 'import'ed into another  # script.
> >  if __name__ == '__main__':
> >      EdkLogger.info('start')
> > diff --git a/BaseTools/Source/Python/Common/Misc.py 
> > b/BaseTools/Source/Python/Common/Misc.py
> > index 80236db160..8dcbe141ae 100644
> > --- a/BaseTools/Source/Python/Common/Misc.py
> > +++ b/BaseTools/Source/Python/Common/Misc.py
> > @@ -777,21 +777,21 @@ class TemplateString(object):
> >  
> >              return "".join(StringList)
> >  
> >      ## Constructor
> >      def __init__(self, Template=None):
> > -        self.String = ''
> > +        self.String = []
> >          self.IsBinary = False
> >          self._Template = Template
> >          self._TemplateSectionList = self._Parse(Template)
> >  
> >      ## str() operator
> >      #
> >      #   @retval     string  The string replaced
> >      #
> >      def __str__(self):
> > -        return self.String
> > +        return "".join(self.String)
> >  
> >      ## Split the template string into fragments per the ${BEGIN} and ${END} flags
> >      #
> >      #   @retval     list    A list of TemplateString.Section objects
> >      #
> > @@ -835,13 +835,16 @@ class TemplateString(object):
> >      #   @param      Dictionary      The placeholder dictionaries
> >      #
> >      def Append(self, AppendString, Dictionary=None):
> >          if Dictionary:
> >              SectionList = self._Parse(AppendString)
> > -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> > +            self.String.append( "".join(S.Instantiate(Dictionary) for 
> > + S in SectionList))
> >          else:
> > -            self.String += AppendString
> > +            if isinstance(AppendString,list):
> > +                self.String.extend(AppendString)
> > +            else:
> > +                self.String.append(AppendString)
> >  
> >      ## Replace the string template with dictionary of placeholders
> >      #
> >      #   @param      Dictionary      The placeholder dictionaries
> >      #
> > @@ -1741,27 +1744,21 @@ class PathClass(object):
> >      #
> >      # @retval False The two PathClass are different
> >      # @retval True  The two PathClass are the same
> >      #
> >      def __eq__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            return self.Path == Other.Path
> > -        else:
> > -            return self.Path == str(Other)
> > +        return self.Path == str(Other)
> >  
> >      ## Override __cmp__ function
> >      #
> >      # Customize the comparsion operation of two PathClass
> >      #
> >      # @retval 0     The two PathClass are different
> >      # @retval -1    The first PathClass is less than the second PathClass
> >      # @retval 1     The first PathClass is Bigger than the second PathClass
> >      def __cmp__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            OtherKey = Other.Path
> > -        else:
> > -            OtherKey = str(Other)
> > +        OtherKey = str(Other)
> >  
> >          SelfKey = self.Path
> >          if SelfKey == OtherKey:
> >              return 0
> >          elif SelfKey > OtherKey:
> > diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py 
> > b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > index 44d44d24eb..d615cccdf7 100644
> > --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
> >          for Record in RecordList:
> >              Lib = Record[0]
> >              Instance = Record[1]
> >              if Instance:
> >                  Instance = NormPath(Instance, self._Macros)
> > -            RetVal[Lib] = Instance
> > +                RetVal[Lib] = Instance
> > +            else:
> > +                RetVal[Lib] = None
> >          return RetVal
> >  
> >      ## Retrieve library names (for Edk.x style of modules)
> >      @cached_property
> >      def Libraries(self):
> > diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py 
> > b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > index 8d8a3e2789..55d01fa4b2 100644
> > --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
> >      while len(LibraryConsumerList) > 0:
> >          M = LibraryConsumerList.pop()
> >          for LibraryClassName in M.LibraryClasses:
> >              if LibraryClassName not in LibraryInstance:
> >                  # override library instance for this module
> > -                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
> > -                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> > -                else:
> > -                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
> > -                if LibraryPath is None or LibraryPath == "":
> > -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> > -                    if LibraryPath is None or LibraryPath == "":
> > +                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
> > +                if LibraryPath is None:
> > +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> > +                    if LibraryPath is None:
> >                          if FileName:
> >                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
> >                                              "Instance of library class [%s] is not found" % LibraryClassName,
> >                                              File=FileName,
> >                                              ExtraData="in [%s] 
> > [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
> > --
> > 2.19.1.windows.1
> > 
> > _______________________________________________
> > edk2-devel mailing list
> > edk2-devel@lists.01.org
> > https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-08 16:40 ` Kinney, Michael D
@ 2018-11-09 14:16   ` Gao, Liming
  2018-11-09 17:01     ` Kinney, Michael D
  0 siblings, 1 reply; 18+ messages in thread
From: Gao, Liming @ 2018-11-09 14:16 UTC (permalink / raw)
  To: Kinney, Michael D, Feng, Bob C, edk2-devel@lists.01.org; +Cc: Carsey, Jaben

Mike:
  This patch bases on edk2 master with Python27. Seemly, most people are not aware that Python3 migration (https://bugzilla.tianocore.org/show_bug.cgi?id=55) is added for next edk2-stable201903 tag planning. In BZ 55, I propose to keep Python2 and Python3 both, and new feature and patches are created based on Python3. I will send the mail to collect the feedback on this approach. 

Thanks
Liming
> -----Original Message-----
> From: Kinney, Michael D
> Sent: Friday, November 9, 2018 12:41 AM
> To: Feng, Bob C <bob.c.feng@intel.com>; edk2-devel@lists.01.org; Kinney, Michael D <michael.d.kinney@intel.com>
> Cc: Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
> Subject: RE: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> Bob,
> 
> Is this for edk2/master or for the Python 3 conversion in the
> edk2-staging branch?  If it is for the edk-staging branch, then
> the Subject is not correct.
> 
> Thanks,
> 
> Mike
> 
> > -----Original Message-----
> > From: edk2-devel [mailto:edk2-devel-
> > bounces@lists.01.org] On Behalf Of BobCF
> > Sent: Thursday, November 8, 2018 2:16 AM
> > To: edk2-devel@lists.01.org
> > Cc: Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming
> > <liming.gao@intel.com>
> > Subject: [edk2] [Patch] BaseTools: Optimize string
> > concatenation
> >
> > https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> >
> > This patch is one of build tool performance improvement
> > series patches.
> >
> > This patch is going to use join function instead of
> > string += string2 statement.
> >
> > Current code use string += string2 in a loop to combine
> > a string. while creating a string list in a loop and
> > using
> > "".join(stringlist) after the loop will be much faster.
> >
> > Contributed-under: TianoCore Contribution Agreement 1.1
> > Signed-off-by: BobCF <bob.c.feng@intel.com>
> > Cc: Liming Gao <liming.gao@intel.com>
> > Cc: Jaben Carsey <jaben.carsey@intel.com>
> > ---
> >  BaseTools/Source/Python/AutoGen/StrGather.py  | 39
> > +++++++++++++------
> >  BaseTools/Source/Python/Common/Misc.py        | 21
> > +++++-----
> >  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
> >  .../Python/Workspace/WorkspaceCommon.py       | 11 ++-
> > ---
> >  4 files changed, 44 insertions(+), 31 deletions(-)
> >
> > diff --git
> > a/BaseTools/Source/Python/AutoGen/StrGather.py
> > b/BaseTools/Source/Python/AutoGen/StrGather.py
> > index 361d499076..d34a9e9447 100644
> > --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> > +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> > @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
> >  # @param UniGenCFlag      UniString is generated into
> > AutoGen C file when it is set to True
> >  #
> >  # @retval Str:           A string of .h file content
> >  #
> >  def CreateHFileContent(BaseName, UniObjectClass,
> > IsCompatibleMode, UniGenCFlag):
> > -    Str = ''
> > +    Str = []
> >      ValueStartPtr = 60
> >      Line = COMMENT_DEFINE_STR + ' ' +
> > LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr -
> > len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) +
> > DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> >      Line = COMMENT_DEFINE_STR + ' ' +
> > PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' *
> > (ValueStartPtr - len(DEFINE_STR +
> > PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1,
> > 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> > @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName,
> > UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >                      Line = COMMENT_DEFINE_STR + ' ' +
> > Name + ' ' + DecToHexStr(Token, 4) +
> > COMMENT_NOT_REFERENCED
> >                  else:
> >                      Line = COMMENT_DEFINE_STR + ' ' +
> > Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) +
> > DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> >                  UnusedStr = WriteLine(UnusedStr, Line)
> >
> > -    Str = ''.join([Str, UnusedStr])
> > +    Str.extend( UnusedStr)
> >
> >      Str = WriteLine(Str, '')
> >      if IsCompatibleMode or UniGenCFlag:
> >          Str = WriteLine(Str, 'extern unsigned char ' +
> > BaseName + 'Strings[];')
> > -    return Str
> > +    return "".join(Str)
> >
> >  ## Create a complete .h file
> >  #
> >  # Create a complet .h file with file header and file
> > content
> >  #
> > @@ -185,11 +185,11 @@ def CreateHFileContent(BaseName,
> > UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >  # @retval Str:           A string of complete .h file
> >  #
> >  def CreateHFile(BaseName, UniObjectClass,
> > IsCompatibleMode, UniGenCFlag):
> >      HFile = WriteLine('', CreateHFileContent(BaseName,
> > UniObjectClass, IsCompatibleMode, UniGenCFlag))
> >
> > -    return HFile
> > +    return "".join(HFile)
> >
> >  ## Create a buffer to store all items in an array
> >  #
> >  # @param BinBuffer   Buffer to contain Binary data.
> >  # @param Array:      The array need to be formatted
> > @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer,
> > Array):
> >  #
> >  def CreateArrayItem(Array, Width = 16):
> >      MaxLength = Width
> >      Index = 0
> >      Line = '  '
> > -    ArrayItem = ''
> > +    ArrayItem = []
> >
> >      for Item in Array:
> >          if Index < MaxLength:
> >              Line = Line + Item + ',  '
> >              Index = Index + 1
> > @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width
> > = 16):
> >              ArrayItem = WriteLine(ArrayItem, Line)
> >              Line = '  ' + Item + ',  '
> >              Index = 1
> >      ArrayItem = Write(ArrayItem, Line.rstrip())
> >
> > -    return ArrayItem
> > +    return "".join(ArrayItem)
> >
> >  ## CreateCFileStringValue
> >  #
> >  # Create a line with string value
> >  #
> > @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width
> > = 16):
> >
> >  def CreateCFileStringValue(Value):
> >      Value = [StringBlockType] + Value
> >      Str = WriteLine('', CreateArrayItem(Value))
> >
> > -    return Str
> > +    return "".join(Str)
> >
> >  ## GetFilteredLanguage
> >  #
> >  # apply get best language rules to the UNI language
> > code list
> >  #
> > @@ -438,11 +438,11 @@ def CreateCFileContent(BaseName,
> > UniObjectClass, IsCompatibleMode, UniBinBuffer,
> >      #
> >      # Join package data
> >      #
> >      AllStr = Write(AllStr, Str)
> >
> > -    return AllStr
> > +    return "".join(AllStr)
> >
> >  ## Create end of .c file
> >  #
> >  # Create end of .c file
> >  #
> > @@ -465,11 +465,11 @@ def CreateCFileEnd():
> >  #
> >  def CreateCFile(BaseName, UniObjectClass,
> > IsCompatibleMode, FilterInfo):
> >      CFile = ''
> >      CFile = WriteLine(CFile,
> > CreateCFileContent(BaseName, UniObjectClass,
> > IsCompatibleMode, None, FilterInfo))
> >      CFile = WriteLine(CFile, CreateCFileEnd())
> > -    return CFile
> > +    return "".join(CFile)
> >
> >  ## GetFileList
> >  #
> >  # Get a list for all files
> >  #
> > @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList,
> > SourceFileList, IncludeList, IncludePathList, Ski
> >
> >  #
> >  # Write an item
> >  #
> >  def Write(Target, Item):
> > -    return ''.join([Target, Item])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item,list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    return Target
> >
> >  #
> >  # Write an item with a break line
> >  #
> >  def WriteLine(Target, Item):
> > -    return ''.join([Target, Item, '\n'])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item, list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    Target.append('\n')
> > +    return Target
> >
> >  # This acts like the main() function for the script,
> > unless it is 'import'ed into another
> >  # script.
> >  if __name__ == '__main__':
> >      EdkLogger.info('start')
> > diff --git a/BaseTools/Source/Python/Common/Misc.py
> > b/BaseTools/Source/Python/Common/Misc.py
> > index 80236db160..8dcbe141ae 100644
> > --- a/BaseTools/Source/Python/Common/Misc.py
> > +++ b/BaseTools/Source/Python/Common/Misc.py
> > @@ -777,21 +777,21 @@ class TemplateString(object):
> >
> >              return "".join(StringList)
> >
> >      ## Constructor
> >      def __init__(self, Template=None):
> > -        self.String = ''
> > +        self.String = []
> >          self.IsBinary = False
> >          self._Template = Template
> >          self._TemplateSectionList =
> > self._Parse(Template)
> >
> >      ## str() operator
> >      #
> >      #   @retval     string  The string replaced
> >      #
> >      def __str__(self):
> > -        return self.String
> > +        return "".join(self.String)
> >
> >      ## Split the template string into fragments per
> > the ${BEGIN} and ${END} flags
> >      #
> >      #   @retval     list    A list of
> > TemplateString.Section objects
> >      #
> > @@ -835,13 +835,16 @@ class TemplateString(object):
> >      #   @param      Dictionary      The placeholder
> > dictionaries
> >      #
> >      def Append(self, AppendString, Dictionary=None):
> >          if Dictionary:
> >              SectionList = self._Parse(AppendString)
> > -            self.String +=
> > "".join(S.Instantiate(Dictionary) for S in SectionList)
> > +            self.String.append(
> > "".join(S.Instantiate(Dictionary) for S in
> > SectionList))
> >          else:
> > -            self.String += AppendString
> > +            if isinstance(AppendString,list):
> > +                self.String.extend(AppendString)
> > +            else:
> > +                self.String.append(AppendString)
> >
> >      ## Replace the string template with dictionary of
> > placeholders
> >      #
> >      #   @param      Dictionary      The placeholder
> > dictionaries
> >      #
> > @@ -1741,27 +1744,21 @@ class PathClass(object):
> >      #
> >      # @retval False The two PathClass are different
> >      # @retval True  The two PathClass are the same
> >      #
> >      def __eq__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            return self.Path == Other.Path
> > -        else:
> > -            return self.Path == str(Other)
> > +        return self.Path == str(Other)
> >
> >      ## Override __cmp__ function
> >      #
> >      # Customize the comparsion operation of two
> > PathClass
> >      #
> >      # @retval 0     The two PathClass are different
> >      # @retval -1    The first PathClass is less than
> > the second PathClass
> >      # @retval 1     The first PathClass is Bigger than
> > the second PathClass
> >      def __cmp__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            OtherKey = Other.Path
> > -        else:
> > -            OtherKey = str(Other)
> > +        OtherKey = str(Other)
> >
> >          SelfKey = self.Path
> >          if SelfKey == OtherKey:
> >              return 0
> >          elif SelfKey > OtherKey:
> > diff --git
> > a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > index 44d44d24eb..d615cccdf7 100644
> > --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > @@ -612,11 +612,13 @@ class
> > InfBuildData(ModuleBuildClassObject):
> >          for Record in RecordList:
> >              Lib = Record[0]
> >              Instance = Record[1]
> >              if Instance:
> >                  Instance = NormPath(Instance,
> > self._Macros)
> > -            RetVal[Lib] = Instance
> > +                RetVal[Lib] = Instance
> > +            else:
> > +                RetVal[Lib] = None
> >          return RetVal
> >
> >      ## Retrieve library names (for Edk.x style of
> > modules)
> >      @cached_property
> >      def Libraries(self):
> > diff --git
> > a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > index 8d8a3e2789..55d01fa4b2 100644
> > ---
> > a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > +++
> > b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module,
> > Platform, BuildDatabase, Arch, Target, Toolcha
> >      while len(LibraryConsumerList) > 0:
> >          M = LibraryConsumerList.pop()
> >          for LibraryClassName in M.LibraryClasses:
> >              if LibraryClassName not in
> > LibraryInstance:
> >                  # override library instance for this
> > module
> > -                if LibraryClassName in
> > Platform.Modules[str(Module)].LibraryClasses:
> > -                    LibraryPath =
> > Platform.Modules[str(Module)].LibraryClasses[LibraryCla
> > ssName]
> > -                else:
> > -                    LibraryPath =
> > Platform.LibraryClasses[LibraryClassName, ModuleType]
> > -                if LibraryPath is None or LibraryPath
> > == "":
> > -                    LibraryPath =
> > M.LibraryClasses[LibraryClassName]
> > -                    if LibraryPath is None or
> > LibraryPath == "":
> > +                LibraryPath =
> > Platform.Modules[str(Module)].LibraryClasses.get(Librar
> > yClassName,Platform.LibraryClasses[LibraryClassName,
> > ModuleType])
> > +                if LibraryPath is None:
> > +                    LibraryPath =
> > M.LibraryClasses.get(LibraryClassName)
> > +                    if LibraryPath is None:
> >                          if FileName:
> >                              EdkLogger.error("build",
> > RESOURCE_NOT_AVAILABLE,
> >                                              "Instance
> > of library class [%s] is not found" % LibraryClassName,
> >
> > File=FileName,
> >
> > ExtraData="in [%s] [%s]\n\tconsumed by module [%s]" %
> > (str(M), Arch, str(Module)))
> > --
> > 2.19.1.windows.1
> >
> > _______________________________________________
> > edk2-devel mailing list
> > edk2-devel@lists.01.org
> > https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-09 14:16   ` Gao, Liming
@ 2018-11-09 17:01     ` Kinney, Michael D
  0 siblings, 0 replies; 18+ messages in thread
From: Kinney, Michael D @ 2018-11-09 17:01 UTC (permalink / raw)
  To: Gao, Liming, Feng, Bob C, edk2-devel@lists.01.org,
	Kinney, Michael D
  Cc: Carsey, Jaben

Liming,

If we can support both Python2 and Python3 equally,
then I agree that these types of performance enhancements
can be added to edk2/master after the stable tag.

Let's make sure we enter a BZ for each performance
improvement and as Leif has asked, put evidence of the
performance improvement in the BZ.

Mike

> -----Original Message-----
> From: Gao, Liming
> Sent: Friday, November 9, 2018 6:17 AM
> To: Kinney, Michael D <michael.d.kinney@intel.com>;
> Feng, Bob C <bob.c.feng@intel.com>; edk2-
> devel@lists.01.org
> Cc: Carsey, Jaben <jaben.carsey@intel.com>
> Subject: RE: [edk2] [Patch] BaseTools: Optimize string
> concatenation
> 
> Mike:
>   This patch bases on edk2 master with Python27.
> Seemly, most people are not aware that Python3
> migration
> (https://bugzilla.tianocore.org/show_bug.cgi?id=55) is
> added for next edk2-stable201903 tag planning. In BZ
> 55, I propose to keep Python2 and Python3 both, and new
> feature and patches are created based on Python3. I
> will send the mail to collect the feedback on this
> approach.
> 
> Thanks
> Liming
> > -----Original Message-----
> > From: Kinney, Michael D
> > Sent: Friday, November 9, 2018 12:41 AM
> > To: Feng, Bob C <bob.c.feng@intel.com>; edk2-
> devel@lists.01.org; Kinney, Michael D
> <michael.d.kinney@intel.com>
> > Cc: Carsey, Jaben <jaben.carsey@intel.com>; Gao,
> Liming <liming.gao@intel.com>
> > Subject: RE: [edk2] [Patch] BaseTools: Optimize
> string concatenation
> >
> > Bob,
> >
> > Is this for edk2/master or for the Python 3
> conversion in the
> > edk2-staging branch?  If it is for the edk-staging
> branch, then
> > the Subject is not correct.
> >
> > Thanks,
> >
> > Mike
> >
> > > -----Original Message-----
> > > From: edk2-devel [mailto:edk2-devel-
> > > bounces@lists.01.org] On Behalf Of BobCF
> > > Sent: Thursday, November 8, 2018 2:16 AM
> > > To: edk2-devel@lists.01.org
> > > Cc: Carsey, Jaben <jaben.carsey@intel.com>; Gao,
> Liming
> > > <liming.gao@intel.com>
> > > Subject: [edk2] [Patch] BaseTools: Optimize string
> > > concatenation
> > >
> > > https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> > >
> > > This patch is one of build tool performance
> improvement
> > > series patches.
> > >
> > > This patch is going to use join function instead of
> > > string += string2 statement.
> > >
> > > Current code use string += string2 in a loop to
> combine
> > > a string. while creating a string list in a loop
> and
> > > using
> > > "".join(stringlist) after the loop will be much
> faster.
> > >
> > > Contributed-under: TianoCore Contribution Agreement
> 1.1
> > > Signed-off-by: BobCF <bob.c.feng@intel.com>
> > > Cc: Liming Gao <liming.gao@intel.com>
> > > Cc: Jaben Carsey <jaben.carsey@intel.com>
> > > ---
> > >  BaseTools/Source/Python/AutoGen/StrGather.py  | 39
> > > +++++++++++++------
> > >  BaseTools/Source/Python/Common/Misc.py        | 21
> > > +++++-----
> > >  .../Source/Python/Workspace/InfBuildData.py   |  4
> +-
> > >  .../Python/Workspace/WorkspaceCommon.py       | 11
> ++-
> > > ---
> > >  4 files changed, 44 insertions(+), 31 deletions(-)
> > >
> > > diff --git
> > > a/BaseTools/Source/Python/AutoGen/StrGather.py
> > > b/BaseTools/Source/Python/AutoGen/StrGather.py
> > > index 361d499076..d34a9e9447 100644
> > > --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> > > +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> > > @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
> > >  # @param UniGenCFlag      UniString is generated
> into
> > > AutoGen C file when it is set to True
> > >  #
> > >  # @retval Str:           A string of .h file
> content
> > >  #
> > >  def CreateHFileContent(BaseName, UniObjectClass,
> > > IsCompatibleMode, UniGenCFlag):
> > > -    Str = ''
> > > +    Str = []
> > >      ValueStartPtr = 60
> > >      Line = COMMENT_DEFINE_STR + ' ' +
> > > LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr -
> > > len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) +
> > > DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
> > >      Str = WriteLine(Str, Line)
> > >      Line = COMMENT_DEFINE_STR + ' ' +
> > > PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' *
> > > (ValueStartPtr - len(DEFINE_STR +
> > > PRINTABLE_LANGUAGE_NAME_STRING_NAME)) +
> DecToHexStr(1,
> > > 4) + COMMENT_NOT_REFERENCED
> > >      Str = WriteLine(Str, Line)
> > > @@ -164,16 +164,16 @@ def
> CreateHFileContent(BaseName,
> > > UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >                      Line = COMMENT_DEFINE_STR + '
> ' +
> > > Name + ' ' + DecToHexStr(Token, 4) +
> > > COMMENT_NOT_REFERENCED
> > >                  else:
> > >                      Line = COMMENT_DEFINE_STR + '
> ' +
> > > Name + ' ' * (ValueStartPtr - len(DEFINE_STR +
> Name)) +
> > > DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> > >                  UnusedStr = WriteLine(UnusedStr,
> Line)
> > >
> > > -    Str = ''.join([Str, UnusedStr])
> > > +    Str.extend( UnusedStr)
> > >
> > >      Str = WriteLine(Str, '')
> > >      if IsCompatibleMode or UniGenCFlag:
> > >          Str = WriteLine(Str, 'extern unsigned char
> ' +
> > > BaseName + 'Strings[];')
> > > -    return Str
> > > +    return "".join(Str)
> > >
> > >  ## Create a complete .h file
> > >  #
> > >  # Create a complet .h file with file header and
> file
> > > content
> > >  #
> > > @@ -185,11 +185,11 @@ def
> CreateHFileContent(BaseName,
> > > UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >  # @retval Str:           A string of complete .h
> file
> > >  #
> > >  def CreateHFile(BaseName, UniObjectClass,
> > > IsCompatibleMode, UniGenCFlag):
> > >      HFile = WriteLine('',
> CreateHFileContent(BaseName,
> > > UniObjectClass, IsCompatibleMode, UniGenCFlag))
> > >
> > > -    return HFile
> > > +    return "".join(HFile)
> > >
> > >  ## Create a buffer to store all items in an array
> > >  #
> > >  # @param BinBuffer   Buffer to contain Binary
> data.
> > >  # @param Array:      The array need to be
> formatted
> > > @@ -209,11 +209,11 @@ def
> CreateBinBuffer(BinBuffer,
> > > Array):
> > >  #
> > >  def CreateArrayItem(Array, Width = 16):
> > >      MaxLength = Width
> > >      Index = 0
> > >      Line = '  '
> > > -    ArrayItem = ''
> > > +    ArrayItem = []
> > >
> > >      for Item in Array:
> > >          if Index < MaxLength:
> > >              Line = Line + Item + ',  '
> > >              Index = Index + 1
> > > @@ -221,11 +221,11 @@ def CreateArrayItem(Array,
> Width
> > > = 16):
> > >              ArrayItem = WriteLine(ArrayItem, Line)
> > >              Line = '  ' + Item + ',  '
> > >              Index = 1
> > >      ArrayItem = Write(ArrayItem, Line.rstrip())
> > >
> > > -    return ArrayItem
> > > +    return "".join(ArrayItem)
> > >
> > >  ## CreateCFileStringValue
> > >  #
> > >  # Create a line with string value
> > >  #
> > > @@ -236,11 +236,11 @@ def CreateArrayItem(Array,
> Width
> > > = 16):
> > >
> > >  def CreateCFileStringValue(Value):
> > >      Value = [StringBlockType] + Value
> > >      Str = WriteLine('', CreateArrayItem(Value))
> > >
> > > -    return Str
> > > +    return "".join(Str)
> > >
> > >  ## GetFilteredLanguage
> > >  #
> > >  # apply get best language rules to the UNI
> language
> > > code list
> > >  #
> > > @@ -438,11 +438,11 @@ def
> CreateCFileContent(BaseName,
> > > UniObjectClass, IsCompatibleMode, UniBinBuffer,
> > >      #
> > >      # Join package data
> > >      #
> > >      AllStr = Write(AllStr, Str)
> > >
> > > -    return AllStr
> > > +    return "".join(AllStr)
> > >
> > >  ## Create end of .c file
> > >  #
> > >  # Create end of .c file
> > >  #
> > > @@ -465,11 +465,11 @@ def CreateCFileEnd():
> > >  #
> > >  def CreateCFile(BaseName, UniObjectClass,
> > > IsCompatibleMode, FilterInfo):
> > >      CFile = ''
> > >      CFile = WriteLine(CFile,
> > > CreateCFileContent(BaseName, UniObjectClass,
> > > IsCompatibleMode, None, FilterInfo))
> > >      CFile = WriteLine(CFile, CreateCFileEnd())
> > > -    return CFile
> > > +    return "".join(CFile)
> > >
> > >  ## GetFileList
> > >  #
> > >  # Get a list for all files
> > >  #
> > > @@ -572,17 +572,34 @@ def
> GetStringFiles(UniFilList,
> > > SourceFileList, IncludeList, IncludePathList, Ski
> > >
> > >  #
> > >  # Write an item
> > >  #
> > >  def Write(Target, Item):
> > > -    return ''.join([Target, Item])
> > > +    if isinstance(Target,str):
> > > +        Target = [Target]
> > > +    if not Target:
> > > +        Target = []
> > > +    if isinstance(Item,list):
> > > +        Target.extend(Item)
> > > +    else:
> > > +        Target.append(Item)
> > > +    return Target
> > >
> > >  #
> > >  # Write an item with a break line
> > >  #
> > >  def WriteLine(Target, Item):
> > > -    return ''.join([Target, Item, '\n'])
> > > +    if isinstance(Target,str):
> > > +        Target = [Target]
> > > +    if not Target:
> > > +        Target = []
> > > +    if isinstance(Item, list):
> > > +        Target.extend(Item)
> > > +    else:
> > > +        Target.append(Item)
> > > +    Target.append('\n')
> > > +    return Target
> > >
> > >  # This acts like the main() function for the
> script,
> > > unless it is 'import'ed into another
> > >  # script.
> > >  if __name__ == '__main__':
> > >      EdkLogger.info('start')
> > > diff --git a/BaseTools/Source/Python/Common/Misc.py
> > > b/BaseTools/Source/Python/Common/Misc.py
> > > index 80236db160..8dcbe141ae 100644
> > > --- a/BaseTools/Source/Python/Common/Misc.py
> > > +++ b/BaseTools/Source/Python/Common/Misc.py
> > > @@ -777,21 +777,21 @@ class TemplateString(object):
> > >
> > >              return "".join(StringList)
> > >
> > >      ## Constructor
> > >      def __init__(self, Template=None):
> > > -        self.String = ''
> > > +        self.String = []
> > >          self.IsBinary = False
> > >          self._Template = Template
> > >          self._TemplateSectionList =
> > > self._Parse(Template)
> > >
> > >      ## str() operator
> > >      #
> > >      #   @retval     string  The string replaced
> > >      #
> > >      def __str__(self):
> > > -        return self.String
> > > +        return "".join(self.String)
> > >
> > >      ## Split the template string into fragments
> per
> > > the ${BEGIN} and ${END} flags
> > >      #
> > >      #   @retval     list    A list of
> > > TemplateString.Section objects
> > >      #
> > > @@ -835,13 +835,16 @@ class TemplateString(object):
> > >      #   @param      Dictionary      The
> placeholder
> > > dictionaries
> > >      #
> > >      def Append(self, AppendString,
> Dictionary=None):
> > >          if Dictionary:
> > >              SectionList =
> self._Parse(AppendString)
> > > -            self.String +=
> > > "".join(S.Instantiate(Dictionary) for S in
> SectionList)
> > > +            self.String.append(
> > > "".join(S.Instantiate(Dictionary) for S in
> > > SectionList))
> > >          else:
> > > -            self.String += AppendString
> > > +            if isinstance(AppendString,list):
> > > +                self.String.extend(AppendString)
> > > +            else:
> > > +                self.String.append(AppendString)
> > >
> > >      ## Replace the string template with dictionary
> of
> > > placeholders
> > >      #
> > >      #   @param      Dictionary      The
> placeholder
> > > dictionaries
> > >      #
> > > @@ -1741,27 +1744,21 @@ class PathClass(object):
> > >      #
> > >      # @retval False The two PathClass are
> different
> > >      # @retval True  The two PathClass are the same
> > >      #
> > >      def __eq__(self, Other):
> > > -        if isinstance(Other, type(self)):
> > > -            return self.Path == Other.Path
> > > -        else:
> > > -            return self.Path == str(Other)
> > > +        return self.Path == str(Other)
> > >
> > >      ## Override __cmp__ function
> > >      #
> > >      # Customize the comparsion operation of two
> > > PathClass
> > >      #
> > >      # @retval 0     The two PathClass are
> different
> > >      # @retval -1    The first PathClass is less
> than
> > > the second PathClass
> > >      # @retval 1     The first PathClass is Bigger
> than
> > > the second PathClass
> > >      def __cmp__(self, Other):
> > > -        if isinstance(Other, type(self)):
> > > -            OtherKey = Other.Path
> > > -        else:
> > > -            OtherKey = str(Other)
> > > +        OtherKey = str(Other)
> > >
> > >          SelfKey = self.Path
> > >          if SelfKey == OtherKey:
> > >              return 0
> > >          elif SelfKey > OtherKey:
> > > diff --git
> > > a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > index 44d44d24eb..d615cccdf7 100644
> > > ---
> a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > +++
> b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > @@ -612,11 +612,13 @@ class
> > > InfBuildData(ModuleBuildClassObject):
> > >          for Record in RecordList:
> > >              Lib = Record[0]
> > >              Instance = Record[1]
> > >              if Instance:
> > >                  Instance = NormPath(Instance,
> > > self._Macros)
> > > -            RetVal[Lib] = Instance
> > > +                RetVal[Lib] = Instance
> > > +            else:
> > > +                RetVal[Lib] = None
> > >          return RetVal
> > >
> > >      ## Retrieve library names (for Edk.x style of
> > > modules)
> > >      @cached_property
> > >      def Libraries(self):
> > > diff --git
> > >
> a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > >
> b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > index 8d8a3e2789..55d01fa4b2 100644
> > > ---
> > >
> a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > +++
> > >
> b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > @@ -126,17 +126,14 @@ def
> GetModuleLibInstances(Module,
> > > Platform, BuildDatabase, Arch, Target, Toolcha
> > >      while len(LibraryConsumerList) > 0:
> > >          M = LibraryConsumerList.pop()
> > >          for LibraryClassName in M.LibraryClasses:
> > >              if LibraryClassName not in
> > > LibraryInstance:
> > >                  # override library instance for
> this
> > > module
> > > -                if LibraryClassName in
> > > Platform.Modules[str(Module)].LibraryClasses:
> > > -                    LibraryPath =
> > >
> Platform.Modules[str(Module)].LibraryClasses[LibraryCla
> > > ssName]
> > > -                else:
> > > -                    LibraryPath =
> > > Platform.LibraryClasses[LibraryClassName,
> ModuleType]
> > > -                if LibraryPath is None or
> LibraryPath
> > > == "":
> > > -                    LibraryPath =
> > > M.LibraryClasses[LibraryClassName]
> > > -                    if LibraryPath is None or
> > > LibraryPath == "":
> > > +                LibraryPath =
> > >
> Platform.Modules[str(Module)].LibraryClasses.get(Librar
> > >
> yClassName,Platform.LibraryClasses[LibraryClassName,
> > > ModuleType])
> > > +                if LibraryPath is None:
> > > +                    LibraryPath =
> > > M.LibraryClasses.get(LibraryClassName)
> > > +                    if LibraryPath is None:
> > >                          if FileName:
> > >
> EdkLogger.error("build",
> > > RESOURCE_NOT_AVAILABLE,
> > >
> "Instance
> > > of library class [%s] is not found" %
> LibraryClassName,
> > >
> > > File=FileName,
> > >
> > > ExtraData="in [%s] [%s]\n\tconsumed by module [%s]"
> %
> > > (str(M), Arch, str(Module)))
> > > --
> > > 2.19.1.windows.1
> > >
> > > _______________________________________________
> > > edk2-devel mailing list
> > > edk2-devel@lists.01.org
> > > https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-09 11:48     ` Leif Lindholm
@ 2018-11-11  0:41       ` Feng, Bob C
  2018-12-05  2:51         ` Feng, Bob C
  0 siblings, 1 reply; 18+ messages in thread
From: Feng, Bob C @ 2018-11-11  0:41 UTC (permalink / raw)
  To: Leif Lindholm; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

Hi Leif,

I use my desktop to do the benchmark.
CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz
Memory: 16G
OS: Win10

I'll add the performance detailed data to BZ.

-Bob

-----Original Message-----
From: Leif Lindholm [mailto:leif.lindholm@linaro.org] 
Sent: Friday, November 9, 2018 7:49 PM
To: Feng, Bob C <bob.c.feng@intel.com>
Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation

Hi Bob,

On Fri, Nov 09, 2018 at 03:25:04AM +0000, Feng, Bob C wrote:
> Yes. I should show the data.
> 
> My unites scripts is as below. The parameter lines is a string list 
> which size is 43395. The test result is
> 
> ''.join(String list) time:  0.042262
> String += String time  :  3.822699
> 
> def TestPlus(lines):
>     str_target = ""
>     
>     for line in lines:
>         str_target += line
>         
>     return str_target
> 
> def TestJoin(lines):
>     str_target = []
>     
>     for line in lines:
>         str_target.append(line)
>         
>     return "".join(str_target)
> 
> def CompareStrCat():
>     lines = GetStrings()
>     print (len(lines))
>     
>     begin = time.perf_counter()
>     for _ in range(10):
>         TestJoin(lines)
>     end = time.perf_counter() - begin
>     print ("''.join(String list) time: %f" % end)
>     
>     begin = time.perf_counter()
>     for _ in range(10):
>         TestPlus(lines)
>     end = time.perf_counter() - begin
>     print ("String += String time: %f" % end)
> 
> For build OvmfX64, it's not very effective, it saves 2~3 second in 
> Parse/AutoGen phase, because OvmfX64 is relatively simple. It does not 
> enable much features such as Multiple SKU and structure PCD by default 
> and there is no big size Autogen.c/Autogen.h/Makefile generated 
> either. but for the complex platform, this patch will be much 
> effective. The unites above simulates a real case that there is a 
> 43395 lines of Autogen.c generated.

I beg to differ. Shaving 2-3 seconds off the autogen phase is a substantial improvement.

However, on my wimpy 24-core Cortex-A53 system, the effect is not noticeable (fluctuates between 1:56-1:58 whether with or without this patch).

And even on my x86 workstation, I see no measurable difference (12-13s).
What is the hardware you are benchmarking on?

It does not appear to be detrimental to performance on any of my platforms, but I would like to see some more measurements, and I would like to see that logged with some more detail in bugzilla.

Regards,

Leif

> Since this patch mostly effect the Parser/AutoGen phase, I just use 
> "build genmake" to show the improvement data.
> The final result for clean build is:
> Current code:  17 seconds
> After patch:      15 seconds
> 
> Details:
> Current data:
> 
> d:\edk2 (master -> origin)
> λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -t 
> VS2015x86 Build environment: Windows-10-10.0.10240 Build start time: 
> 10:12:32, Nov.09 2018
> 
> WORKSPACE        = d:\edk2
> ECP_SOURCE       = d:\edk2\edkcompatibilitypkg
> EDK_SOURCE       = d:\edk2\edkcompatibilitypkg
> EFI_SOURCE       = d:\edk2\edkcompatibilitypkg
> EDK_TOOLS_PATH   = d:\edk2\basetools
> EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32
> CONF_PATH        = d:\edk2\conf
> 
> Architecture(s)  = IA32 X64
> Build target     = DEBUG
> Toolchain        = VS2015x86
> 
> Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc
> Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf
> 
> Processing meta-data ....... done!
> Generating code . done!
> Generating makefile . done!
> Generating code .. done!
> Generating makefile ...... done!
> 
> - Done -
> Build end time: 10:12:49, Nov.09 2018
> Build total time: 00:00:17
> 
> After applying this patch:
> 
> d:\edk2 (master -> origin)                                    
> λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -
> Build environment: Windows-10-10.0.10240                      
> Build start time: 10:11:41, Nov.09 2018                       
>                                                               
> WORKSPACE        = d:\edk2                                    
> ECP_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EDK_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EFI_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EDK_TOOLS_PATH   = d:\edk2\basetools                          
> EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32                
> CONF_PATH        = d:\edk2\conf                               
>                                                               
>                                                               
> Architecture(s)  = IA32 X64                                   
> Build target     = DEBUG                                      
> Toolchain        = VS2015x86                                  
>                                                               
> Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc 
> Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf 
>                                                               
> Processing meta-data ..... done!                              
> Generating code . done!                                       
> Generating makefile . done!                                   
> Generating code .. done!                                      
> Generating makefile ...... done!                              
>                                                               
> - Done -                                                      
> Build end time: 10:11:56, Nov.09 2018                         
> Build total time: 00:00:15                                    
> 
> 
> Thanks,
> Bob
> 
> 
> -----Original Message-----
> From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> Sent: Friday, November 9, 2018 12:53 AM
> To: Feng, Bob C <bob.c.feng@intel.com>
> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; 
> Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> On Thu, Nov 08, 2018 at 06:16:25PM +0800, BobCF wrote:
> > https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> > 
> > This patch is one of build tool performance improvement series 
> > patches.
> > 
> > This patch is going to use join function instead of string += 
> > string2 statement.
> > 
> > Current code use string += string2 in a loop to combine a string. 
> > while creating a string list in a loop and using
> > "".join(stringlist) after the loop will be much faster.
> 
> Do you have any numbers on the level of improvement seen?
> 
> Either for the individual scripts when called identically, or (if
> measurable) on the build of an entire platform (say OvmfX64?).
> 
> Regards,
> 
> Leif
> 
> > Contributed-under: TianoCore Contribution Agreement 1.1
> > Signed-off-by: BobCF <bob.c.feng@intel.com>
> > Cc: Liming Gao <liming.gao@intel.com>
> > Cc: Jaben Carsey <jaben.carsey@intel.com>
> > ---
> >  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
> >  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
> >  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
> >  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
> >  4 files changed, 44 insertions(+), 31 deletions(-)
> > 
> > diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py
> > b/BaseTools/Source/Python/AutoGen/StrGather.py
> > index 361d499076..d34a9e9447 100644
> > --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> > +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> > @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
> >  # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
> >  #
> >  # @retval Str:           A string of .h file content
> >  #
> >  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > -    Str = ''
> > +    Str = []
> >      ValueStartPtr = 60
> >      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> >      Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> > @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> >                  else:
> >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> >                  UnusedStr = WriteLine(UnusedStr, Line)
> >  
> > -    Str = ''.join([Str, UnusedStr])
> > +    Str.extend( UnusedStr)
> >  
> >      Str = WriteLine(Str, '')
> >      if IsCompatibleMode or UniGenCFlag:
> >          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> > -    return Str
> > +    return "".join(Str)
> >  
> >  ## Create a complete .h file
> >  #
> >  # Create a complet .h file with file header and file content  # @@
> > -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >  # @retval Str:           A string of complete .h file
> >  #
> >  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >      HFile = WriteLine('', CreateHFileContent(BaseName, 
> > UniObjectClass, IsCompatibleMode, UniGenCFlag))
> >  
> > -    return HFile
> > +    return "".join(HFile)
> >  
> >  ## Create a buffer to store all items in an array  #
> >  # @param BinBuffer   Buffer to contain Binary data.
> >  # @param Array:      The array need to be formatted
> > @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
> >  #
> >  def CreateArrayItem(Array, Width = 16):
> >      MaxLength = Width
> >      Index = 0
> >      Line = '  '
> > -    ArrayItem = ''
> > +    ArrayItem = []
> >  
> >      for Item in Array:
> >          if Index < MaxLength:
> >              Line = Line + Item + ',  '
> >              Index = Index + 1
> > @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
> >              ArrayItem = WriteLine(ArrayItem, Line)
> >              Line = '  ' + Item + ',  '
> >              Index = 1
> >      ArrayItem = Write(ArrayItem, Line.rstrip())
> >  
> > -    return ArrayItem
> > +    return "".join(ArrayItem)
> >  
> >  ## CreateCFileStringValue
> >  #
> >  # Create a line with string value
> >  #
> > @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
> >  
> >  def CreateCFileStringValue(Value):
> >      Value = [StringBlockType] + Value
> >      Str = WriteLine('', CreateArrayItem(Value))
> >  
> > -    return Str
> > +    return "".join(Str)
> >  
> >  ## GetFilteredLanguage
> >  #
> >  # apply get best language rules to the UNI language code list  # @@
> > -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
> >      #
> >      # Join package data
> >      #
> >      AllStr = Write(AllStr, Str)
> >  
> > -    return AllStr
> > +    return "".join(AllStr)
> >  
> >  ## Create end of .c file
> >  #
> >  # Create end of .c file
> >  #
> > @@ -465,11 +465,11 @@ def CreateCFileEnd():
> >  #
> >  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
> >      CFile = ''
> >      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
> >      CFile = WriteLine(CFile, CreateCFileEnd())
> > -    return CFile
> > +    return "".join(CFile)
> >  
> >  ## GetFileList
> >  #
> >  # Get a list for all files
> >  #
> > @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList, 
> > IncludeList, IncludePathList, Ski
> >  
> >  #
> >  # Write an item
> >  #
> >  def Write(Target, Item):
> > -    return ''.join([Target, Item])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item,list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    return Target
> >  
> >  #
> >  # Write an item with a break line
> >  #
> >  def WriteLine(Target, Item):
> > -    return ''.join([Target, Item, '\n'])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item, list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    Target.append('\n')
> > +    return Target
> >  
> >  # This acts like the main() function for the script, unless it is 
> > 'import'ed into another  # script.
> >  if __name__ == '__main__':
> >      EdkLogger.info('start')
> > diff --git a/BaseTools/Source/Python/Common/Misc.py
> > b/BaseTools/Source/Python/Common/Misc.py
> > index 80236db160..8dcbe141ae 100644
> > --- a/BaseTools/Source/Python/Common/Misc.py
> > +++ b/BaseTools/Source/Python/Common/Misc.py
> > @@ -777,21 +777,21 @@ class TemplateString(object):
> >  
> >              return "".join(StringList)
> >  
> >      ## Constructor
> >      def __init__(self, Template=None):
> > -        self.String = ''
> > +        self.String = []
> >          self.IsBinary = False
> >          self._Template = Template
> >          self._TemplateSectionList = self._Parse(Template)
> >  
> >      ## str() operator
> >      #
> >      #   @retval     string  The string replaced
> >      #
> >      def __str__(self):
> > -        return self.String
> > +        return "".join(self.String)
> >  
> >      ## Split the template string into fragments per the ${BEGIN} and ${END} flags
> >      #
> >      #   @retval     list    A list of TemplateString.Section objects
> >      #
> > @@ -835,13 +835,16 @@ class TemplateString(object):
> >      #   @param      Dictionary      The placeholder dictionaries
> >      #
> >      def Append(self, AppendString, Dictionary=None):
> >          if Dictionary:
> >              SectionList = self._Parse(AppendString)
> > -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> > +            self.String.append( "".join(S.Instantiate(Dictionary) 
> > + for S in SectionList))
> >          else:
> > -            self.String += AppendString
> > +            if isinstance(AppendString,list):
> > +                self.String.extend(AppendString)
> > +            else:
> > +                self.String.append(AppendString)
> >  
> >      ## Replace the string template with dictionary of placeholders
> >      #
> >      #   @param      Dictionary      The placeholder dictionaries
> >      #
> > @@ -1741,27 +1744,21 @@ class PathClass(object):
> >      #
> >      # @retval False The two PathClass are different
> >      # @retval True  The two PathClass are the same
> >      #
> >      def __eq__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            return self.Path == Other.Path
> > -        else:
> > -            return self.Path == str(Other)
> > +        return self.Path == str(Other)
> >  
> >      ## Override __cmp__ function
> >      #
> >      # Customize the comparsion operation of two PathClass
> >      #
> >      # @retval 0     The two PathClass are different
> >      # @retval -1    The first PathClass is less than the second PathClass
> >      # @retval 1     The first PathClass is Bigger than the second PathClass
> >      def __cmp__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            OtherKey = Other.Path
> > -        else:
> > -            OtherKey = str(Other)
> > +        OtherKey = str(Other)
> >  
> >          SelfKey = self.Path
> >          if SelfKey == OtherKey:
> >              return 0
> >          elif SelfKey > OtherKey:
> > diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > index 44d44d24eb..d615cccdf7 100644
> > --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
> >          for Record in RecordList:
> >              Lib = Record[0]
> >              Instance = Record[1]
> >              if Instance:
> >                  Instance = NormPath(Instance, self._Macros)
> > -            RetVal[Lib] = Instance
> > +                RetVal[Lib] = Instance
> > +            else:
> > +                RetVal[Lib] = None
> >          return RetVal
> >  
> >      ## Retrieve library names (for Edk.x style of modules)
> >      @cached_property
> >      def Libraries(self):
> > diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > index 8d8a3e2789..55d01fa4b2 100644
> > --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
> >      while len(LibraryConsumerList) > 0:
> >          M = LibraryConsumerList.pop()
> >          for LibraryClassName in M.LibraryClasses:
> >              if LibraryClassName not in LibraryInstance:
> >                  # override library instance for this module
> > -                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
> > -                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> > -                else:
> > -                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
> > -                if LibraryPath is None or LibraryPath == "":
> > -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> > -                    if LibraryPath is None or LibraryPath == "":
> > +                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
> > +                if LibraryPath is None:
> > +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> > +                    if LibraryPath is None:
> >                          if FileName:
> >                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
> >                                              "Instance of library class [%s] is not found" % LibraryClassName,
> >                                              File=FileName,
> >                                              ExtraData="in [%s] 
> > [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
> > --
> > 2.19.1.windows.1
> > 
> > _______________________________________________
> > edk2-devel mailing list
> > edk2-devel@lists.01.org
> > https://lists.01.org/mailman/listinfo/edk2-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-11-11  0:41       ` Feng, Bob C
@ 2018-12-05  2:51         ` Feng, Bob C
  2018-12-10 10:47           ` Leif Lindholm
  0 siblings, 1 reply; 18+ messages in thread
From: Feng, Bob C @ 2018-12-05  2:51 UTC (permalink / raw)
  To: edk2-devel@lists.01.org
  Cc: Leif Lindholm, Carsey, Jaben, Gao, Liming, Feng, Bob C

Hi,

I have added the performance data in BZ https://bugzilla.tianocore.org/show_bug.cgi?id=1288 . Please have a review.

Thanks,
Bob


-----Original Message-----
From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Feng, Bob C
Sent: Sunday, November 11, 2018 8:41 AM
To: Leif Lindholm <leif.lindholm@linaro.org>
Cc: Carsey, Jaben <jaben.carsey@intel.com>; edk2-devel@lists.01.org; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation

Hi Leif,

I use my desktop to do the benchmark.
CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz
Memory: 16G
OS: Win10

I'll add the performance detailed data to BZ.

-Bob

-----Original Message-----
From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
Sent: Friday, November 9, 2018 7:49 PM
To: Feng, Bob C <bob.c.feng@intel.com>
Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation

Hi Bob,

On Fri, Nov 09, 2018 at 03:25:04AM +0000, Feng, Bob C wrote:
> Yes. I should show the data.
> 
> My unites scripts is as below. The parameter lines is a string list 
> which size is 43395. The test result is
> 
> ''.join(String list) time:  0.042262
> String += String time  :  3.822699
> 
> def TestPlus(lines):
>     str_target = ""
>     
>     for line in lines:
>         str_target += line
>         
>     return str_target
> 
> def TestJoin(lines):
>     str_target = []
>     
>     for line in lines:
>         str_target.append(line)
>         
>     return "".join(str_target)
> 
> def CompareStrCat():
>     lines = GetStrings()
>     print (len(lines))
>     
>     begin = time.perf_counter()
>     for _ in range(10):
>         TestJoin(lines)
>     end = time.perf_counter() - begin
>     print ("''.join(String list) time: %f" % end)
>     
>     begin = time.perf_counter()
>     for _ in range(10):
>         TestPlus(lines)
>     end = time.perf_counter() - begin
>     print ("String += String time: %f" % end)
> 
> For build OvmfX64, it's not very effective, it saves 2~3 second in 
> Parse/AutoGen phase, because OvmfX64 is relatively simple. It does not 
> enable much features such as Multiple SKU and structure PCD by default 
> and there is no big size Autogen.c/Autogen.h/Makefile generated 
> either. but for the complex platform, this patch will be much 
> effective. The unites above simulates a real case that there is a
> 43395 lines of Autogen.c generated.

I beg to differ. Shaving 2-3 seconds off the autogen phase is a substantial improvement.

However, on my wimpy 24-core Cortex-A53 system, the effect is not noticeable (fluctuates between 1:56-1:58 whether with or without this patch).

And even on my x86 workstation, I see no measurable difference (12-13s).
What is the hardware you are benchmarking on?

It does not appear to be detrimental to performance on any of my platforms, but I would like to see some more measurements, and I would like to see that logged with some more detail in bugzilla.

Regards,

Leif

> Since this patch mostly effect the Parser/AutoGen phase, I just use 
> "build genmake" to show the improvement data.
> The final result for clean build is:
> Current code:  17 seconds
> After patch:      15 seconds
> 
> Details:
> Current data:
> 
> d:\edk2 (master -> origin)
> λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -t
> VS2015x86 Build environment: Windows-10-10.0.10240 Build start time: 
> 10:12:32, Nov.09 2018
> 
> WORKSPACE        = d:\edk2
> ECP_SOURCE       = d:\edk2\edkcompatibilitypkg
> EDK_SOURCE       = d:\edk2\edkcompatibilitypkg
> EFI_SOURCE       = d:\edk2\edkcompatibilitypkg
> EDK_TOOLS_PATH   = d:\edk2\basetools
> EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32
> CONF_PATH        = d:\edk2\conf
> 
> Architecture(s)  = IA32 X64
> Build target     = DEBUG
> Toolchain        = VS2015x86
> 
> Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc
> Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf
> 
> Processing meta-data ....... done!
> Generating code . done!
> Generating makefile . done!
> Generating code .. done!
> Generating makefile ...... done!
> 
> - Done -
> Build end time: 10:12:49, Nov.09 2018
> Build total time: 00:00:17
> 
> After applying this patch:
> 
> d:\edk2 (master -> origin)                                    
> λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -
> Build environment: Windows-10-10.0.10240                      
> Build start time: 10:11:41, Nov.09 2018                       
>                                                               
> WORKSPACE        = d:\edk2                                    
> ECP_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EDK_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EFI_SOURCE       = d:\edk2\edkcompatibilitypkg                
> EDK_TOOLS_PATH   = d:\edk2\basetools                          
> EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32                
> CONF_PATH        = d:\edk2\conf                               
>                                                               
>                                                               
> Architecture(s)  = IA32 X64                                   
> Build target     = DEBUG                                      
> Toolchain        = VS2015x86                                  
>                                                               
> Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc 
> Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf 
>                                                               
> Processing meta-data ..... done!                              
> Generating code . done!                                       
> Generating makefile . done!                                   
> Generating code .. done!                                      
> Generating makefile ...... done!                              
>                                                               
> - Done -                                                      
> Build end time: 10:11:56, Nov.09 2018                         
> Build total time: 00:00:15                                    
> 
> 
> Thanks,
> Bob
> 
> 
> -----Original Message-----
> From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> Sent: Friday, November 9, 2018 12:53 AM
> To: Feng, Bob C <bob.c.feng@intel.com>
> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; 
> Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> On Thu, Nov 08, 2018 at 06:16:25PM +0800, BobCF wrote:
> > https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> > 
> > This patch is one of build tool performance improvement series 
> > patches.
> > 
> > This patch is going to use join function instead of string +=
> > string2 statement.
> > 
> > Current code use string += string2 in a loop to combine a string. 
> > while creating a string list in a loop and using
> > "".join(stringlist) after the loop will be much faster.
> 
> Do you have any numbers on the level of improvement seen?
> 
> Either for the individual scripts when called identically, or (if
> measurable) on the build of an entire platform (say OvmfX64?).
> 
> Regards,
> 
> Leif
> 
> > Contributed-under: TianoCore Contribution Agreement 1.1
> > Signed-off-by: BobCF <bob.c.feng@intel.com>
> > Cc: Liming Gao <liming.gao@intel.com>
> > Cc: Jaben Carsey <jaben.carsey@intel.com>
> > ---
> >  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
> >  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
> >  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
> >  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
> >  4 files changed, 44 insertions(+), 31 deletions(-)
> > 
> > diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py
> > b/BaseTools/Source/Python/AutoGen/StrGather.py
> > index 361d499076..d34a9e9447 100644
> > --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> > +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> > @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
> >  # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
> >  #
> >  # @retval Str:           A string of .h file content
> >  #
> >  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > -    Str = ''
> > +    Str = []
> >      ValueStartPtr = 60
> >      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> >      Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
> >      Str = WriteLine(Str, Line)
> > @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> >                  else:
> >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> >                  UnusedStr = WriteLine(UnusedStr, Line)
> >  
> > -    Str = ''.join([Str, UnusedStr])
> > +    Str.extend( UnusedStr)
> >  
> >      Str = WriteLine(Str, '')
> >      if IsCompatibleMode or UniGenCFlag:
> >          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> > -    return Str
> > +    return "".join(Str)
> >  
> >  ## Create a complete .h file
> >  #
> >  # Create a complet .h file with file header and file content  # @@
> > -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >  # @retval Str:           A string of complete .h file
> >  #
> >  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> >      HFile = WriteLine('', CreateHFileContent(BaseName, 
> > UniObjectClass, IsCompatibleMode, UniGenCFlag))
> >  
> > -    return HFile
> > +    return "".join(HFile)
> >  
> >  ## Create a buffer to store all items in an array  #
> >  # @param BinBuffer   Buffer to contain Binary data.
> >  # @param Array:      The array need to be formatted
> > @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
> >  #
> >  def CreateArrayItem(Array, Width = 16):
> >      MaxLength = Width
> >      Index = 0
> >      Line = '  '
> > -    ArrayItem = ''
> > +    ArrayItem = []
> >  
> >      for Item in Array:
> >          if Index < MaxLength:
> >              Line = Line + Item + ',  '
> >              Index = Index + 1
> > @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
> >              ArrayItem = WriteLine(ArrayItem, Line)
> >              Line = '  ' + Item + ',  '
> >              Index = 1
> >      ArrayItem = Write(ArrayItem, Line.rstrip())
> >  
> > -    return ArrayItem
> > +    return "".join(ArrayItem)
> >  
> >  ## CreateCFileStringValue
> >  #
> >  # Create a line with string value
> >  #
> > @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
> >  
> >  def CreateCFileStringValue(Value):
> >      Value = [StringBlockType] + Value
> >      Str = WriteLine('', CreateArrayItem(Value))
> >  
> > -    return Str
> > +    return "".join(Str)
> >  
> >  ## GetFilteredLanguage
> >  #
> >  # apply get best language rules to the UNI language code list  # @@
> > -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
> >      #
> >      # Join package data
> >      #
> >      AllStr = Write(AllStr, Str)
> >  
> > -    return AllStr
> > +    return "".join(AllStr)
> >  
> >  ## Create end of .c file
> >  #
> >  # Create end of .c file
> >  #
> > @@ -465,11 +465,11 @@ def CreateCFileEnd():
> >  #
> >  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
> >      CFile = ''
> >      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
> >      CFile = WriteLine(CFile, CreateCFileEnd())
> > -    return CFile
> > +    return "".join(CFile)
> >  
> >  ## GetFileList
> >  #
> >  # Get a list for all files
> >  #
> > @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList, 
> > IncludeList, IncludePathList, Ski
> >  
> >  #
> >  # Write an item
> >  #
> >  def Write(Target, Item):
> > -    return ''.join([Target, Item])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item,list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    return Target
> >  
> >  #
> >  # Write an item with a break line
> >  #
> >  def WriteLine(Target, Item):
> > -    return ''.join([Target, Item, '\n'])
> > +    if isinstance(Target,str):
> > +        Target = [Target]
> > +    if not Target:
> > +        Target = []
> > +    if isinstance(Item, list):
> > +        Target.extend(Item)
> > +    else:
> > +        Target.append(Item)
> > +    Target.append('\n')
> > +    return Target
> >  
> >  # This acts like the main() function for the script, unless it is 
> > 'import'ed into another  # script.
> >  if __name__ == '__main__':
> >      EdkLogger.info('start')
> > diff --git a/BaseTools/Source/Python/Common/Misc.py
> > b/BaseTools/Source/Python/Common/Misc.py
> > index 80236db160..8dcbe141ae 100644
> > --- a/BaseTools/Source/Python/Common/Misc.py
> > +++ b/BaseTools/Source/Python/Common/Misc.py
> > @@ -777,21 +777,21 @@ class TemplateString(object):
> >  
> >              return "".join(StringList)
> >  
> >      ## Constructor
> >      def __init__(self, Template=None):
> > -        self.String = ''
> > +        self.String = []
> >          self.IsBinary = False
> >          self._Template = Template
> >          self._TemplateSectionList = self._Parse(Template)
> >  
> >      ## str() operator
> >      #
> >      #   @retval     string  The string replaced
> >      #
> >      def __str__(self):
> > -        return self.String
> > +        return "".join(self.String)
> >  
> >      ## Split the template string into fragments per the ${BEGIN} and ${END} flags
> >      #
> >      #   @retval     list    A list of TemplateString.Section objects
> >      #
> > @@ -835,13 +835,16 @@ class TemplateString(object):
> >      #   @param      Dictionary      The placeholder dictionaries
> >      #
> >      def Append(self, AppendString, Dictionary=None):
> >          if Dictionary:
> >              SectionList = self._Parse(AppendString)
> > -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> > +            self.String.append( "".join(S.Instantiate(Dictionary) 
> > + for S in SectionList))
> >          else:
> > -            self.String += AppendString
> > +            if isinstance(AppendString,list):
> > +                self.String.extend(AppendString)
> > +            else:
> > +                self.String.append(AppendString)
> >  
> >      ## Replace the string template with dictionary of placeholders
> >      #
> >      #   @param      Dictionary      The placeholder dictionaries
> >      #
> > @@ -1741,27 +1744,21 @@ class PathClass(object):
> >      #
> >      # @retval False The two PathClass are different
> >      # @retval True  The two PathClass are the same
> >      #
> >      def __eq__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            return self.Path == Other.Path
> > -        else:
> > -            return self.Path == str(Other)
> > +        return self.Path == str(Other)
> >  
> >      ## Override __cmp__ function
> >      #
> >      # Customize the comparsion operation of two PathClass
> >      #
> >      # @retval 0     The two PathClass are different
> >      # @retval -1    The first PathClass is less than the second PathClass
> >      # @retval 1     The first PathClass is Bigger than the second PathClass
> >      def __cmp__(self, Other):
> > -        if isinstance(Other, type(self)):
> > -            OtherKey = Other.Path
> > -        else:
> > -            OtherKey = str(Other)
> > +        OtherKey = str(Other)
> >  
> >          SelfKey = self.Path
> >          if SelfKey == OtherKey:
> >              return 0
> >          elif SelfKey > OtherKey:
> > diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > index 44d44d24eb..d615cccdf7 100644
> > --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
> >          for Record in RecordList:
> >              Lib = Record[0]
> >              Instance = Record[1]
> >              if Instance:
> >                  Instance = NormPath(Instance, self._Macros)
> > -            RetVal[Lib] = Instance
> > +                RetVal[Lib] = Instance
> > +            else:
> > +                RetVal[Lib] = None
> >          return RetVal
> >  
> >      ## Retrieve library names (for Edk.x style of modules)
> >      @cached_property
> >      def Libraries(self):
> > diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > index 8d8a3e2789..55d01fa4b2 100644
> > --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
> >      while len(LibraryConsumerList) > 0:
> >          M = LibraryConsumerList.pop()
> >          for LibraryClassName in M.LibraryClasses:
> >              if LibraryClassName not in LibraryInstance:
> >                  # override library instance for this module
> > -                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
> > -                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> > -                else:
> > -                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
> > -                if LibraryPath is None or LibraryPath == "":
> > -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> > -                    if LibraryPath is None or LibraryPath == "":
> > +                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
> > +                if LibraryPath is None:
> > +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> > +                    if LibraryPath is None:
> >                          if FileName:
> >                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
> >                                              "Instance of library class [%s] is not found" % LibraryClassName,
> >                                              File=FileName,
> >                                              ExtraData="in [%s] 
> > [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
> > --
> > 2.19.1.windows.1
> > 
> > _______________________________________________
> > edk2-devel mailing list
> > edk2-devel@lists.01.org
> > https://lists.01.org/mailman/listinfo/edk2-devel
_______________________________________________
edk2-devel mailing list
edk2-devel@lists.01.org
https://lists.01.org/mailman/listinfo/edk2-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-05  2:51         ` Feng, Bob C
@ 2018-12-10 10:47           ` Leif Lindholm
  2018-12-10 12:09             ` Feng, Bob C
  0 siblings, 1 reply; 18+ messages in thread
From: Leif Lindholm @ 2018-12-10 10:47 UTC (permalink / raw)
  To: Feng, Bob C; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

Hi Bob,

Thanks.

I am a little bit confused by the "customized deepcopy" and "cache for
uni file parser" data. These look like they are slowing down rather
than speeding up the build.

Regards,

Leif

On Wed, Dec 05, 2018 at 02:51:58AM +0000, Feng, Bob C wrote:
> Hi,
> 
> I have added the performance data in BZ
> https://bugzilla.tianocore.org/show_bug.cgi?id=1288 . Please have a
> review.
> 
> Thanks,
> Bob
> 
> 
> -----Original Message-----
> From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Feng, Bob C
> Sent: Sunday, November 11, 2018 8:41 AM
> To: Leif Lindholm <leif.lindholm@linaro.org>
> Cc: Carsey, Jaben <jaben.carsey@intel.com>; edk2-devel@lists.01.org; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> Hi Leif,
> 
> I use my desktop to do the benchmark.
> CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz
> Memory: 16G
> OS: Win10
> 
> I'll add the performance detailed data to BZ.
> 
> -Bob
> 
> -----Original Message-----
> From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> Sent: Friday, November 9, 2018 7:49 PM
> To: Feng, Bob C <bob.c.feng@intel.com>
> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> Hi Bob,
> 
> On Fri, Nov 09, 2018 at 03:25:04AM +0000, Feng, Bob C wrote:
> > Yes. I should show the data.
> > 
> > My unites scripts is as below. The parameter lines is a string list 
> > which size is 43395. The test result is
> > 
> > ''.join(String list) time:  0.042262
> > String += String time  :  3.822699
> > 
> > def TestPlus(lines):
> >     str_target = ""
> >     
> >     for line in lines:
> >         str_target += line
> >         
> >     return str_target
> > 
> > def TestJoin(lines):
> >     str_target = []
> >     
> >     for line in lines:
> >         str_target.append(line)
> >         
> >     return "".join(str_target)
> > 
> > def CompareStrCat():
> >     lines = GetStrings()
> >     print (len(lines))
> >     
> >     begin = time.perf_counter()
> >     for _ in range(10):
> >         TestJoin(lines)
> >     end = time.perf_counter() - begin
> >     print ("''.join(String list) time: %f" % end)
> >     
> >     begin = time.perf_counter()
> >     for _ in range(10):
> >         TestPlus(lines)
> >     end = time.perf_counter() - begin
> >     print ("String += String time: %f" % end)
> > 
> > For build OvmfX64, it's not very effective, it saves 2~3 second in 
> > Parse/AutoGen phase, because OvmfX64 is relatively simple. It does not 
> > enable much features such as Multiple SKU and structure PCD by default 
> > and there is no big size Autogen.c/Autogen.h/Makefile generated 
> > either. but for the complex platform, this patch will be much 
> > effective. The unites above simulates a real case that there is a
> > 43395 lines of Autogen.c generated.
> 
> I beg to differ. Shaving 2-3 seconds off the autogen phase is a substantial improvement.
> 
> However, on my wimpy 24-core Cortex-A53 system, the effect is not noticeable (fluctuates between 1:56-1:58 whether with or without this patch).
> 
> And even on my x86 workstation, I see no measurable difference (12-13s).
> What is the hardware you are benchmarking on?
> 
> It does not appear to be detrimental to performance on any of my platforms, but I would like to see some more measurements, and I would like to see that logged with some more detail in bugzilla.
> 
> Regards,
> 
> Leif
> 
> > Since this patch mostly effect the Parser/AutoGen phase, I just use 
> > "build genmake" to show the improvement data.
> > The final result for clean build is:
> > Current code:  17 seconds
> > After patch:      15 seconds
> > 
> > Details:
> > Current data:
> > 
> > d:\edk2 (master -> origin)
> > λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -t
> > VS2015x86 Build environment: Windows-10-10.0.10240 Build start time: 
> > 10:12:32, Nov.09 2018
> > 
> > WORKSPACE        = d:\edk2
> > ECP_SOURCE       = d:\edk2\edkcompatibilitypkg
> > EDK_SOURCE       = d:\edk2\edkcompatibilitypkg
> > EFI_SOURCE       = d:\edk2\edkcompatibilitypkg
> > EDK_TOOLS_PATH   = d:\edk2\basetools
> > EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32
> > CONF_PATH        = d:\edk2\conf
> > 
> > Architecture(s)  = IA32 X64
> > Build target     = DEBUG
> > Toolchain        = VS2015x86
> > 
> > Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc
> > Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf
> > 
> > Processing meta-data ....... done!
> > Generating code . done!
> > Generating makefile . done!
> > Generating code .. done!
> > Generating makefile ...... done!
> > 
> > - Done -
> > Build end time: 10:12:49, Nov.09 2018
> > Build total time: 00:00:17
> > 
> > After applying this patch:
> > 
> > d:\edk2 (master -> origin)                                    
> > λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -
> > Build environment: Windows-10-10.0.10240                      
> > Build start time: 10:11:41, Nov.09 2018                       
> >                                                               
> > WORKSPACE        = d:\edk2                                    
> > ECP_SOURCE       = d:\edk2\edkcompatibilitypkg                
> > EDK_SOURCE       = d:\edk2\edkcompatibilitypkg                
> > EFI_SOURCE       = d:\edk2\edkcompatibilitypkg                
> > EDK_TOOLS_PATH   = d:\edk2\basetools                          
> > EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32                
> > CONF_PATH        = d:\edk2\conf                               
> >                                                               
> >                                                               
> > Architecture(s)  = IA32 X64                                   
> > Build target     = DEBUG                                      
> > Toolchain        = VS2015x86                                  
> >                                                               
> > Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc 
> > Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf 
> >                                                               
> > Processing meta-data ..... done!                              
> > Generating code . done!                                       
> > Generating makefile . done!                                   
> > Generating code .. done!                                      
> > Generating makefile ...... done!                              
> >                                                               
> > - Done -                                                      
> > Build end time: 10:11:56, Nov.09 2018                         
> > Build total time: 00:00:15                                    
> > 
> > 
> > Thanks,
> > Bob
> > 
> > 
> > -----Original Message-----
> > From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> > Sent: Friday, November 9, 2018 12:53 AM
> > To: Feng, Bob C <bob.c.feng@intel.com>
> > Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; 
> > Gao, Liming <liming.gao@intel.com>
> > Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> > 
> > On Thu, Nov 08, 2018 at 06:16:25PM +0800, BobCF wrote:
> > > https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> > > 
> > > This patch is one of build tool performance improvement series 
> > > patches.
> > > 
> > > This patch is going to use join function instead of string +=
> > > string2 statement.
> > > 
> > > Current code use string += string2 in a loop to combine a string. 
> > > while creating a string list in a loop and using
> > > "".join(stringlist) after the loop will be much faster.
> > 
> > Do you have any numbers on the level of improvement seen?
> > 
> > Either for the individual scripts when called identically, or (if
> > measurable) on the build of an entire platform (say OvmfX64?).
> > 
> > Regards,
> > 
> > Leif
> > 
> > > Contributed-under: TianoCore Contribution Agreement 1.1
> > > Signed-off-by: BobCF <bob.c.feng@intel.com>
> > > Cc: Liming Gao <liming.gao@intel.com>
> > > Cc: Jaben Carsey <jaben.carsey@intel.com>
> > > ---
> > >  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
> > >  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
> > >  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
> > >  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
> > >  4 files changed, 44 insertions(+), 31 deletions(-)
> > > 
> > > diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py
> > > b/BaseTools/Source/Python/AutoGen/StrGather.py
> > > index 361d499076..d34a9e9447 100644
> > > --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> > > +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> > > @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
> > >  # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
> > >  #
> > >  # @retval Str:           A string of .h file content
> > >  #
> > >  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > > -    Str = ''
> > > +    Str = []
> > >      ValueStartPtr = 60
> > >      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
> > >      Str = WriteLine(Str, Line)
> > >      Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
> > >      Str = WriteLine(Str, Line)
> > > @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> > >                  else:
> > >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> > >                  UnusedStr = WriteLine(UnusedStr, Line)
> > >  
> > > -    Str = ''.join([Str, UnusedStr])
> > > +    Str.extend( UnusedStr)
> > >  
> > >      Str = WriteLine(Str, '')
> > >      if IsCompatibleMode or UniGenCFlag:
> > >          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> > > -    return Str
> > > +    return "".join(Str)
> > >  
> > >  ## Create a complete .h file
> > >  #
> > >  # Create a complet .h file with file header and file content  # @@
> > > -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >  # @retval Str:           A string of complete .h file
> > >  #
> > >  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >      HFile = WriteLine('', CreateHFileContent(BaseName, 
> > > UniObjectClass, IsCompatibleMode, UniGenCFlag))
> > >  
> > > -    return HFile
> > > +    return "".join(HFile)
> > >  
> > >  ## Create a buffer to store all items in an array  #
> > >  # @param BinBuffer   Buffer to contain Binary data.
> > >  # @param Array:      The array need to be formatted
> > > @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
> > >  #
> > >  def CreateArrayItem(Array, Width = 16):
> > >      MaxLength = Width
> > >      Index = 0
> > >      Line = '  '
> > > -    ArrayItem = ''
> > > +    ArrayItem = []
> > >  
> > >      for Item in Array:
> > >          if Index < MaxLength:
> > >              Line = Line + Item + ',  '
> > >              Index = Index + 1
> > > @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
> > >              ArrayItem = WriteLine(ArrayItem, Line)
> > >              Line = '  ' + Item + ',  '
> > >              Index = 1
> > >      ArrayItem = Write(ArrayItem, Line.rstrip())
> > >  
> > > -    return ArrayItem
> > > +    return "".join(ArrayItem)
> > >  
> > >  ## CreateCFileStringValue
> > >  #
> > >  # Create a line with string value
> > >  #
> > > @@ -236,11 +236,11 @@ def CreateArrayItem(Array, Width = 16):
> > >  
> > >  def CreateCFileStringValue(Value):
> > >      Value = [StringBlockType] + Value
> > >      Str = WriteLine('', CreateArrayItem(Value))
> > >  
> > > -    return Str
> > > +    return "".join(Str)
> > >  
> > >  ## GetFilteredLanguage
> > >  #
> > >  # apply get best language rules to the UNI language code list  # @@
> > > -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
> > >      #
> > >      # Join package data
> > >      #
> > >      AllStr = Write(AllStr, Str)
> > >  
> > > -    return AllStr
> > > +    return "".join(AllStr)
> > >  
> > >  ## Create end of .c file
> > >  #
> > >  # Create end of .c file
> > >  #
> > > @@ -465,11 +465,11 @@ def CreateCFileEnd():
> > >  #
> > >  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
> > >      CFile = ''
> > >      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
> > >      CFile = WriteLine(CFile, CreateCFileEnd())
> > > -    return CFile
> > > +    return "".join(CFile)
> > >  
> > >  ## GetFileList
> > >  #
> > >  # Get a list for all files
> > >  #
> > > @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, SourceFileList, 
> > > IncludeList, IncludePathList, Ski
> > >  
> > >  #
> > >  # Write an item
> > >  #
> > >  def Write(Target, Item):
> > > -    return ''.join([Target, Item])
> > > +    if isinstance(Target,str):
> > > +        Target = [Target]
> > > +    if not Target:
> > > +        Target = []
> > > +    if isinstance(Item,list):
> > > +        Target.extend(Item)
> > > +    else:
> > > +        Target.append(Item)
> > > +    return Target
> > >  
> > >  #
> > >  # Write an item with a break line
> > >  #
> > >  def WriteLine(Target, Item):
> > > -    return ''.join([Target, Item, '\n'])
> > > +    if isinstance(Target,str):
> > > +        Target = [Target]
> > > +    if not Target:
> > > +        Target = []
> > > +    if isinstance(Item, list):
> > > +        Target.extend(Item)
> > > +    else:
> > > +        Target.append(Item)
> > > +    Target.append('\n')
> > > +    return Target
> > >  
> > >  # This acts like the main() function for the script, unless it is 
> > > 'import'ed into another  # script.
> > >  if __name__ == '__main__':
> > >      EdkLogger.info('start')
> > > diff --git a/BaseTools/Source/Python/Common/Misc.py
> > > b/BaseTools/Source/Python/Common/Misc.py
> > > index 80236db160..8dcbe141ae 100644
> > > --- a/BaseTools/Source/Python/Common/Misc.py
> > > +++ b/BaseTools/Source/Python/Common/Misc.py
> > > @@ -777,21 +777,21 @@ class TemplateString(object):
> > >  
> > >              return "".join(StringList)
> > >  
> > >      ## Constructor
> > >      def __init__(self, Template=None):
> > > -        self.String = ''
> > > +        self.String = []
> > >          self.IsBinary = False
> > >          self._Template = Template
> > >          self._TemplateSectionList = self._Parse(Template)
> > >  
> > >      ## str() operator
> > >      #
> > >      #   @retval     string  The string replaced
> > >      #
> > >      def __str__(self):
> > > -        return self.String
> > > +        return "".join(self.String)
> > >  
> > >      ## Split the template string into fragments per the ${BEGIN} and ${END} flags
> > >      #
> > >      #   @retval     list    A list of TemplateString.Section objects
> > >      #
> > > @@ -835,13 +835,16 @@ class TemplateString(object):
> > >      #   @param      Dictionary      The placeholder dictionaries
> > >      #
> > >      def Append(self, AppendString, Dictionary=None):
> > >          if Dictionary:
> > >              SectionList = self._Parse(AppendString)
> > > -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> > > +            self.String.append( "".join(S.Instantiate(Dictionary) 
> > > + for S in SectionList))
> > >          else:
> > > -            self.String += AppendString
> > > +            if isinstance(AppendString,list):
> > > +                self.String.extend(AppendString)
> > > +            else:
> > > +                self.String.append(AppendString)
> > >  
> > >      ## Replace the string template with dictionary of placeholders
> > >      #
> > >      #   @param      Dictionary      The placeholder dictionaries
> > >      #
> > > @@ -1741,27 +1744,21 @@ class PathClass(object):
> > >      #
> > >      # @retval False The two PathClass are different
> > >      # @retval True  The two PathClass are the same
> > >      #
> > >      def __eq__(self, Other):
> > > -        if isinstance(Other, type(self)):
> > > -            return self.Path == Other.Path
> > > -        else:
> > > -            return self.Path == str(Other)
> > > +        return self.Path == str(Other)
> > >  
> > >      ## Override __cmp__ function
> > >      #
> > >      # Customize the comparsion operation of two PathClass
> > >      #
> > >      # @retval 0     The two PathClass are different
> > >      # @retval -1    The first PathClass is less than the second PathClass
> > >      # @retval 1     The first PathClass is Bigger than the second PathClass
> > >      def __cmp__(self, Other):
> > > -        if isinstance(Other, type(self)):
> > > -            OtherKey = Other.Path
> > > -        else:
> > > -            OtherKey = str(Other)
> > > +        OtherKey = str(Other)
> > >  
> > >          SelfKey = self.Path
> > >          if SelfKey == OtherKey:
> > >              return 0
> > >          elif SelfKey > OtherKey:
> > > diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > index 44d44d24eb..d615cccdf7 100644
> > > --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
> > >          for Record in RecordList:
> > >              Lib = Record[0]
> > >              Instance = Record[1]
> > >              if Instance:
> > >                  Instance = NormPath(Instance, self._Macros)
> > > -            RetVal[Lib] = Instance
> > > +                RetVal[Lib] = Instance
> > > +            else:
> > > +                RetVal[Lib] = None
> > >          return RetVal
> > >  
> > >      ## Retrieve library names (for Edk.x style of modules)
> > >      @cached_property
> > >      def Libraries(self):
> > > diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > index 8d8a3e2789..55d01fa4b2 100644
> > > --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
> > >      while len(LibraryConsumerList) > 0:
> > >          M = LibraryConsumerList.pop()
> > >          for LibraryClassName in M.LibraryClasses:
> > >              if LibraryClassName not in LibraryInstance:
> > >                  # override library instance for this module
> > > -                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
> > > -                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> > > -                else:
> > > -                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
> > > -                if LibraryPath is None or LibraryPath == "":
> > > -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> > > -                    if LibraryPath is None or LibraryPath == "":
> > > +                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
> > > +                if LibraryPath is None:
> > > +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> > > +                    if LibraryPath is None:
> > >                          if FileName:
> > >                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
> > >                                              "Instance of library class [%s] is not found" % LibraryClassName,
> > >                                              File=FileName,
> > >                                              ExtraData="in [%s] 
> > > [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
> > > --
> > > 2.19.1.windows.1
> > > 
> > > _______________________________________________
> > > edk2-devel mailing list
> > > edk2-devel@lists.01.org
> > > https://lists.01.org/mailman/listinfo/edk2-devel
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-10 10:47           ` Leif Lindholm
@ 2018-12-10 12:09             ` Feng, Bob C
  2018-12-10 12:35               ` Leif Lindholm
  0 siblings, 1 reply; 18+ messages in thread
From: Feng, Bob C @ 2018-12-10 12:09 UTC (permalink / raw)
  To: Leif Lindholm; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

Hi Leif,

For the "customized deepcopy" and "cache for uni file parser" data, you can see the AutoGen is not slower. The whole Build Duration is longer because Make Duration is longer while Make Duration time depends on the external make, compiler and linker. So it's not the patch make the build slow down.

Yes,  it's not faster either. I think that because the Ovmf platform is relatively simple.  From the build tool source code point of view, the customized deepcopy will take effect if the platform enabled multiple SKU or there are many expressions in metadata file to be evaluated. And the "cache for uni file parser" needs there are many uni files.  The Ovmf platform looks not a good platform to demo the effect of this patch.


Thanks,
Bob

-----Original Message-----
From: Leif Lindholm [mailto:leif.lindholm@linaro.org] 
Sent: Monday, December 10, 2018 6:48 PM
To: Feng, Bob C <bob.c.feng@intel.com>
Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation

Hi Bob,

Thanks.

I am a little bit confused by the "customized deepcopy" and "cache for uni file parser" data. These look like they are slowing down rather than speeding up the build.

Regards,

Leif

On Wed, Dec 05, 2018 at 02:51:58AM +0000, Feng, Bob C wrote:
> Hi,
> 
> I have added the performance data in BZ
> https://bugzilla.tianocore.org/show_bug.cgi?id=1288 . Please have a 
> review.
> 
> Thanks,
> Bob
> 
> 
> -----Original Message-----
> From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of 
> Feng, Bob C
> Sent: Sunday, November 11, 2018 8:41 AM
> To: Leif Lindholm <leif.lindholm@linaro.org>
> Cc: Carsey, Jaben <jaben.carsey@intel.com>; edk2-devel@lists.01.org; 
> Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> Hi Leif,
> 
> I use my desktop to do the benchmark.
> CPU: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz
> Memory: 16G
> OS: Win10
> 
> I'll add the performance detailed data to BZ.
> 
> -Bob
> 
> -----Original Message-----
> From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> Sent: Friday, November 9, 2018 7:49 PM
> To: Feng, Bob C <bob.c.feng@intel.com>
> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; 
> Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> Hi Bob,
> 
> On Fri, Nov 09, 2018 at 03:25:04AM +0000, Feng, Bob C wrote:
> > Yes. I should show the data.
> > 
> > My unites scripts is as below. The parameter lines is a string list 
> > which size is 43395. The test result is
> > 
> > ''.join(String list) time:  0.042262 String += String time  :  
> > 3.822699
> > 
> > def TestPlus(lines):
> >     str_target = ""
> >     
> >     for line in lines:
> >         str_target += line
> >         
> >     return str_target
> > 
> > def TestJoin(lines):
> >     str_target = []
> >     
> >     for line in lines:
> >         str_target.append(line)
> >         
> >     return "".join(str_target)
> > 
> > def CompareStrCat():
> >     lines = GetStrings()
> >     print (len(lines))
> >     
> >     begin = time.perf_counter()
> >     for _ in range(10):
> >         TestJoin(lines)
> >     end = time.perf_counter() - begin
> >     print ("''.join(String list) time: %f" % end)
> >     
> >     begin = time.perf_counter()
> >     for _ in range(10):
> >         TestPlus(lines)
> >     end = time.perf_counter() - begin
> >     print ("String += String time: %f" % end)
> > 
> > For build OvmfX64, it's not very effective, it saves 2~3 second in 
> > Parse/AutoGen phase, because OvmfX64 is relatively simple. It does 
> > not enable much features such as Multiple SKU and structure PCD by 
> > default and there is no big size Autogen.c/Autogen.h/Makefile 
> > generated either. but for the complex platform, this patch will be 
> > much effective. The unites above simulates a real case that there is 
> > a
> > 43395 lines of Autogen.c generated.
> 
> I beg to differ. Shaving 2-3 seconds off the autogen phase is a substantial improvement.
> 
> However, on my wimpy 24-core Cortex-A53 system, the effect is not noticeable (fluctuates between 1:56-1:58 whether with or without this patch).
> 
> And even on my x86 workstation, I see no measurable difference (12-13s).
> What is the hardware you are benchmarking on?
> 
> It does not appear to be detrimental to performance on any of my platforms, but I would like to see some more measurements, and I would like to see that logged with some more detail in bugzilla.
> 
> Regards,
> 
> Leif
> 
> > Since this patch mostly effect the Parser/AutoGen phase, I just use 
> > "build genmake" to show the improvement data.
> > The final result for clean build is:
> > Current code:  17 seconds
> > After patch:      15 seconds
> > 
> > Details:
> > Current data:
> > 
> > d:\edk2 (master -> origin)
> > λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -t
> > VS2015x86 Build environment: Windows-10-10.0.10240 Build start time: 
> > 10:12:32, Nov.09 2018
> > 
> > WORKSPACE        = d:\edk2
> > ECP_SOURCE       = d:\edk2\edkcompatibilitypkg
> > EDK_SOURCE       = d:\edk2\edkcompatibilitypkg
> > EFI_SOURCE       = d:\edk2\edkcompatibilitypkg
> > EDK_TOOLS_PATH   = d:\edk2\basetools
> > EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32
> > CONF_PATH        = d:\edk2\conf
> > 
> > Architecture(s)  = IA32 X64
> > Build target     = DEBUG
> > Toolchain        = VS2015x86
> > 
> > Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc
> > Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf
> > 
> > Processing meta-data ....... done!
> > Generating code . done!
> > Generating makefile . done!
> > Generating code .. done!
> > Generating makefile ...... done!
> > 
> > - Done -
> > Build end time: 10:12:49, Nov.09 2018 Build total time: 00:00:17
> > 
> > After applying this patch:
> > 
> > d:\edk2 (master -> origin)                                    
> > λ build genmake -p OvmfPkg\OvmfPkgIa32X64.dsc -a IA32 -a X64 -
> > Build environment: Windows-10-10.0.10240                      
> > Build start time: 10:11:41, Nov.09 2018                       
> >                                                               
> > WORKSPACE        = d:\edk2                                    
> > ECP_SOURCE       = d:\edk2\edkcompatibilitypkg                
> > EDK_SOURCE       = d:\edk2\edkcompatibilitypkg                
> > EFI_SOURCE       = d:\edk2\edkcompatibilitypkg                
> > EDK_TOOLS_PATH   = d:\edk2\basetools                          
> > EDK_TOOLS_BIN    = d:\edk2\basetools\bin\win32                
> > CONF_PATH        = d:\edk2\conf                               
> >                                                               
> >                                                               
> > Architecture(s)  = IA32 X64                                   
> > Build target     = DEBUG                                      
> > Toolchain        = VS2015x86                                  
> >                                                               
> > Active Platform          = d:\edk2\OvmfPkg\OvmfPkgIa32X64.dsc 
> > Flash Image Definition   = d:\edk2\OvmfPkg\OvmfPkgIa32X64.fdf 
> >                                                               
> > Processing meta-data ..... done!                              
> > Generating code . done!                                       
> > Generating makefile . done!                                   
> > Generating code .. done!                                      
> > Generating makefile ...... done!                              
> >                                                               
> > - Done -                                                      
> > Build end time: 10:11:56, Nov.09 2018                         
> > Build total time: 00:00:15                                    
> > 
> > 
> > Thanks,
> > Bob
> > 
> > 
> > -----Original Message-----
> > From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> > Sent: Friday, November 9, 2018 12:53 AM
> > To: Feng, Bob C <bob.c.feng@intel.com>
> > Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; 
> > Gao, Liming <liming.gao@intel.com>
> > Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> > 
> > On Thu, Nov 08, 2018 at 06:16:25PM +0800, BobCF wrote:
> > > https://bugzilla.tianocore.org/show_bug.cgi?id=1288
> > > 
> > > This patch is one of build tool performance improvement series 
> > > patches.
> > > 
> > > This patch is going to use join function instead of string +=
> > > string2 statement.
> > > 
> > > Current code use string += string2 in a loop to combine a string. 
> > > while creating a string list in a loop and using
> > > "".join(stringlist) after the loop will be much faster.
> > 
> > Do you have any numbers on the level of improvement seen?
> > 
> > Either for the individual scripts when called identically, or (if
> > measurable) on the build of an entire platform (say OvmfX64?).
> > 
> > Regards,
> > 
> > Leif
> > 
> > > Contributed-under: TianoCore Contribution Agreement 1.1
> > > Signed-off-by: BobCF <bob.c.feng@intel.com>
> > > Cc: Liming Gao <liming.gao@intel.com>
> > > Cc: Jaben Carsey <jaben.carsey@intel.com>
> > > ---
> > >  BaseTools/Source/Python/AutoGen/StrGather.py  | 39 +++++++++++++------
> > >  BaseTools/Source/Python/Common/Misc.py        | 21 +++++-----
> > >  .../Source/Python/Workspace/InfBuildData.py   |  4 +-
> > >  .../Python/Workspace/WorkspaceCommon.py       | 11 ++----
> > >  4 files changed, 44 insertions(+), 31 deletions(-)
> > > 
> > > diff --git a/BaseTools/Source/Python/AutoGen/StrGather.py
> > > b/BaseTools/Source/Python/AutoGen/StrGather.py
> > > index 361d499076..d34a9e9447 100644
> > > --- a/BaseTools/Source/Python/AutoGen/StrGather.py
> > > +++ b/BaseTools/Source/Python/AutoGen/StrGather.py
> > > @@ -135,11 +135,11 @@ def AscToHexList(Ascii):
> > >  # @param UniGenCFlag      UniString is generated into AutoGen C file when it is set to True
> > >  #
> > >  # @retval Str:           A string of .h file content
> > >  #
> > >  def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > > -    Str = ''
> > > +    Str = []
> > >      ValueStartPtr = 60
> > >      Line = COMMENT_DEFINE_STR + ' ' + LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(0, 4) + COMMENT_NOT_REFERENCED
> > >      Str = WriteLine(Str, Line)
> > >      Line = COMMENT_DEFINE_STR + ' ' + PRINTABLE_LANGUAGE_NAME_STRING_NAME + ' ' * (ValueStartPtr - len(DEFINE_STR + PRINTABLE_LANGUAGE_NAME_STRING_NAME)) + DecToHexStr(1, 4) + COMMENT_NOT_REFERENCED
> > >      Str = WriteLine(Str, Line)
> > > @@ -164,16 +164,16 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> > >                  else:
> > >                      Line = COMMENT_DEFINE_STR + ' ' + Name + ' ' * (ValueStartPtr - len(DEFINE_STR + Name)) + DecToHexStr(Token, 4) + COMMENT_NOT_REFERENCED
> > >                  UnusedStr = WriteLine(UnusedStr, Line)
> > >  
> > > -    Str = ''.join([Str, UnusedStr])
> > > +    Str.extend( UnusedStr)
> > >  
> > >      Str = WriteLine(Str, '')
> > >      if IsCompatibleMode or UniGenCFlag:
> > >          Str = WriteLine(Str, 'extern unsigned char ' + BaseName + 'Strings[];')
> > > -    return Str
> > > +    return "".join(Str)
> > >  
> > >  ## Create a complete .h file
> > >  #
> > >  # Create a complet .h file with file header and file content  # 
> > > @@
> > > -185,11 +185,11 @@ def CreateHFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >  # @retval Str:           A string of complete .h file
> > >  #
> > >  def CreateHFile(BaseName, UniObjectClass, IsCompatibleMode, UniGenCFlag):
> > >      HFile = WriteLine('', CreateHFileContent(BaseName, 
> > > UniObjectClass, IsCompatibleMode, UniGenCFlag))
> > >  
> > > -    return HFile
> > > +    return "".join(HFile)
> > >  
> > >  ## Create a buffer to store all items in an array  #
> > >  # @param BinBuffer   Buffer to contain Binary data.
> > >  # @param Array:      The array need to be formatted
> > > @@ -209,11 +209,11 @@ def CreateBinBuffer(BinBuffer, Array):
> > >  #
> > >  def CreateArrayItem(Array, Width = 16):
> > >      MaxLength = Width
> > >      Index = 0
> > >      Line = '  '
> > > -    ArrayItem = ''
> > > +    ArrayItem = []
> > >  
> > >      for Item in Array:
> > >          if Index < MaxLength:
> > >              Line = Line + Item + ',  '
> > >              Index = Index + 1
> > > @@ -221,11 +221,11 @@ def CreateArrayItem(Array, Width = 16):
> > >              ArrayItem = WriteLine(ArrayItem, Line)
> > >              Line = '  ' + Item + ',  '
> > >              Index = 1
> > >      ArrayItem = Write(ArrayItem, Line.rstrip())
> > >  
> > > -    return ArrayItem
> > > +    return "".join(ArrayItem)
> > >  
> > >  ## CreateCFileStringValue
> > >  #
> > >  # Create a line with string value  # @@ -236,11 +236,11 @@ def 
> > > CreateArrayItem(Array, Width = 16):
> > >  
> > >  def CreateCFileStringValue(Value):
> > >      Value = [StringBlockType] + Value
> > >      Str = WriteLine('', CreateArrayItem(Value))
> > >  
> > > -    return Str
> > > +    return "".join(Str)
> > >  
> > >  ## GetFilteredLanguage
> > >  #
> > >  # apply get best language rules to the UNI language code list  # 
> > > @@
> > > -438,11 +438,11 @@ def CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, UniBinBuffer,
> > >      #
> > >      # Join package data
> > >      #
> > >      AllStr = Write(AllStr, Str)
> > >  
> > > -    return AllStr
> > > +    return "".join(AllStr)
> > >  
> > >  ## Create end of .c file
> > >  #
> > >  # Create end of .c file
> > >  #
> > > @@ -465,11 +465,11 @@ def CreateCFileEnd():
> > >  #
> > >  def CreateCFile(BaseName, UniObjectClass, IsCompatibleMode, FilterInfo):
> > >      CFile = ''
> > >      CFile = WriteLine(CFile, CreateCFileContent(BaseName, UniObjectClass, IsCompatibleMode, None, FilterInfo))
> > >      CFile = WriteLine(CFile, CreateCFileEnd())
> > > -    return CFile
> > > +    return "".join(CFile)
> > >  
> > >  ## GetFileList
> > >  #
> > >  # Get a list for all files
> > >  #
> > > @@ -572,17 +572,34 @@ def GetStringFiles(UniFilList, 
> > > SourceFileList, IncludeList, IncludePathList, Ski
> > >  
> > >  #
> > >  # Write an item
> > >  #
> > >  def Write(Target, Item):
> > > -    return ''.join([Target, Item])
> > > +    if isinstance(Target,str):
> > > +        Target = [Target]
> > > +    if not Target:
> > > +        Target = []
> > > +    if isinstance(Item,list):
> > > +        Target.extend(Item)
> > > +    else:
> > > +        Target.append(Item)
> > > +    return Target
> > >  
> > >  #
> > >  # Write an item with a break line  #  def WriteLine(Target, 
> > > Item):
> > > -    return ''.join([Target, Item, '\n'])
> > > +    if isinstance(Target,str):
> > > +        Target = [Target]
> > > +    if not Target:
> > > +        Target = []
> > > +    if isinstance(Item, list):
> > > +        Target.extend(Item)
> > > +    else:
> > > +        Target.append(Item)
> > > +    Target.append('\n')
> > > +    return Target
> > >  
> > >  # This acts like the main() function for the script, unless it is 
> > > 'import'ed into another  # script.
> > >  if __name__ == '__main__':
> > >      EdkLogger.info('start')
> > > diff --git a/BaseTools/Source/Python/Common/Misc.py
> > > b/BaseTools/Source/Python/Common/Misc.py
> > > index 80236db160..8dcbe141ae 100644
> > > --- a/BaseTools/Source/Python/Common/Misc.py
> > > +++ b/BaseTools/Source/Python/Common/Misc.py
> > > @@ -777,21 +777,21 @@ class TemplateString(object):
> > >  
> > >              return "".join(StringList)
> > >  
> > >      ## Constructor
> > >      def __init__(self, Template=None):
> > > -        self.String = ''
> > > +        self.String = []
> > >          self.IsBinary = False
> > >          self._Template = Template
> > >          self._TemplateSectionList = self._Parse(Template)
> > >  
> > >      ## str() operator
> > >      #
> > >      #   @retval     string  The string replaced
> > >      #
> > >      def __str__(self):
> > > -        return self.String
> > > +        return "".join(self.String)
> > >  
> > >      ## Split the template string into fragments per the ${BEGIN} and ${END} flags
> > >      #
> > >      #   @retval     list    A list of TemplateString.Section objects
> > >      #
> > > @@ -835,13 +835,16 @@ class TemplateString(object):
> > >      #   @param      Dictionary      The placeholder dictionaries
> > >      #
> > >      def Append(self, AppendString, Dictionary=None):
> > >          if Dictionary:
> > >              SectionList = self._Parse(AppendString)
> > > -            self.String += "".join(S.Instantiate(Dictionary) for S in SectionList)
> > > +            self.String.append( "".join(S.Instantiate(Dictionary) 
> > > + for S in SectionList))
> > >          else:
> > > -            self.String += AppendString
> > > +            if isinstance(AppendString,list):
> > > +                self.String.extend(AppendString)
> > > +            else:
> > > +                self.String.append(AppendString)
> > >  
> > >      ## Replace the string template with dictionary of placeholders
> > >      #
> > >      #   @param      Dictionary      The placeholder dictionaries
> > >      #
> > > @@ -1741,27 +1744,21 @@ class PathClass(object):
> > >      #
> > >      # @retval False The two PathClass are different
> > >      # @retval True  The two PathClass are the same
> > >      #
> > >      def __eq__(self, Other):
> > > -        if isinstance(Other, type(self)):
> > > -            return self.Path == Other.Path
> > > -        else:
> > > -            return self.Path == str(Other)
> > > +        return self.Path == str(Other)
> > >  
> > >      ## Override __cmp__ function
> > >      #
> > >      # Customize the comparsion operation of two PathClass
> > >      #
> > >      # @retval 0     The two PathClass are different
> > >      # @retval -1    The first PathClass is less than the second PathClass
> > >      # @retval 1     The first PathClass is Bigger than the second PathClass
> > >      def __cmp__(self, Other):
> > > -        if isinstance(Other, type(self)):
> > > -            OtherKey = Other.Path
> > > -        else:
> > > -            OtherKey = str(Other)
> > > +        OtherKey = str(Other)
> > >  
> > >          SelfKey = self.Path
> > >          if SelfKey == OtherKey:
> > >              return 0
> > >          elif SelfKey > OtherKey:
> > > diff --git a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > index 44d44d24eb..d615cccdf7 100644
> > > --- a/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > +++ b/BaseTools/Source/Python/Workspace/InfBuildData.py
> > > @@ -612,11 +612,13 @@ class InfBuildData(ModuleBuildClassObject):
> > >          for Record in RecordList:
> > >              Lib = Record[0]
> > >              Instance = Record[1]
> > >              if Instance:
> > >                  Instance = NormPath(Instance, self._Macros)
> > > -            RetVal[Lib] = Instance
> > > +                RetVal[Lib] = Instance
> > > +            else:
> > > +                RetVal[Lib] = None
> > >          return RetVal
> > >  
> > >      ## Retrieve library names (for Edk.x style of modules)
> > >      @cached_property
> > >      def Libraries(self):
> > > diff --git a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > index 8d8a3e2789..55d01fa4b2 100644
> > > --- a/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > +++ b/BaseTools/Source/Python/Workspace/WorkspaceCommon.py
> > > @@ -126,17 +126,14 @@ def GetModuleLibInstances(Module, Platform, BuildDatabase, Arch, Target, Toolcha
> > >      while len(LibraryConsumerList) > 0:
> > >          M = LibraryConsumerList.pop()
> > >          for LibraryClassName in M.LibraryClasses:
> > >              if LibraryClassName not in LibraryInstance:
> > >                  # override library instance for this module
> > > -                if LibraryClassName in Platform.Modules[str(Module)].LibraryClasses:
> > > -                    LibraryPath = Platform.Modules[str(Module)].LibraryClasses[LibraryClassName]
> > > -                else:
> > > -                    LibraryPath = Platform.LibraryClasses[LibraryClassName, ModuleType]
> > > -                if LibraryPath is None or LibraryPath == "":
> > > -                    LibraryPath = M.LibraryClasses[LibraryClassName]
> > > -                    if LibraryPath is None or LibraryPath == "":
> > > +                LibraryPath = Platform.Modules[str(Module)].LibraryClasses.get(LibraryClassName,Platform.LibraryClasses[LibraryClassName, ModuleType])
> > > +                if LibraryPath is None:
> > > +                    LibraryPath = M.LibraryClasses.get(LibraryClassName)
> > > +                    if LibraryPath is None:
> > >                          if FileName:
> > >                              EdkLogger.error("build", RESOURCE_NOT_AVAILABLE,
> > >                                              "Instance of library class [%s] is not found" % LibraryClassName,
> > >                                              File=FileName,
> > >                                              ExtraData="in [%s] 
> > > [%s]\n\tconsumed by module [%s]" % (str(M), Arch, str(Module)))
> > > --
> > > 2.19.1.windows.1
> > > 
> > > _______________________________________________
> > > edk2-devel mailing list
> > > edk2-devel@lists.01.org
> > > https://lists.01.org/mailman/listinfo/edk2-devel
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-10 12:09             ` Feng, Bob C
@ 2018-12-10 12:35               ` Leif Lindholm
  2018-12-11  8:48                 ` Feng, Bob C
  0 siblings, 1 reply; 18+ messages in thread
From: Leif Lindholm @ 2018-12-10 12:35 UTC (permalink / raw)
  To: Feng, Bob C; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

On Mon, Dec 10, 2018 at 12:09:23PM +0000, Feng, Bob C wrote:
> For the "customized deepcopy" and "cache for uni file parser" data,
> you can see the AutoGen is not slower. The whole Build Duration is
> longer because Make Duration is longer while Make Duration time
> depends on the external make, compiler and linker. So it's not the
> patch make the build slow down.
> 
> Yes,  it's not faster either. I think that because the Ovmf platform
> is relatively simple.  From the build tool source code point of
> view, the customized deepcopy will take effect if the platform
> enabled multiple SKU or there are many expressions in metadata file
> to be evaluated. And the "cache for uni file parser" needs there are
> many uni files.  The Ovmf platform looks not a good platform to demo
> the effect of this patch.

But surely we should not introduce patches said to improve performance
when the only data we have available shows that they slow things down?

If the performance data is not representative, then it is worthless.

Don't get me wrong - if you say "and for this secret platform I can't
share with you, it improves build performance by X", then I may be OK
with a minor slowdown on the platforms I do have available to test, if
X is not minor.

But if the improvement is only theoretical, and we have no evidence
that it helps real platforms, it should not be committed.

Regards,

Leif


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-10 12:35               ` Leif Lindholm
@ 2018-12-11  8:48                 ` Feng, Bob C
  2018-12-12 18:37                   ` Leif Lindholm
  0 siblings, 1 reply; 18+ messages in thread
From: Feng, Bob C @ 2018-12-11  8:48 UTC (permalink / raw)
  To: Leif Lindholm; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

Hi Leif,

I understand your concern.  

I collected another performance data set based on open source MinKabylake platform and updated the BZ https://bugzilla.tianocore.org/show_bug.cgi?id=1288. The data looks better than Ovmf. It enabled multiple SKU.

Before I sent those patch, I did verify them on intel real platforms. It improves the build performance. But it's not convenient to share those data.

Thanks,
Bob

-----Original Message-----
From: Leif Lindholm [mailto:leif.lindholm@linaro.org] 
Sent: Monday, December 10, 2018 8:36 PM
To: Feng, Bob C <bob.c.feng@intel.com>
Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation

On Mon, Dec 10, 2018 at 12:09:23PM +0000, Feng, Bob C wrote:
> For the "customized deepcopy" and "cache for uni file parser" data, 
> you can see the AutoGen is not slower. The whole Build Duration is 
> longer because Make Duration is longer while Make Duration time 
> depends on the external make, compiler and linker. So it's not the 
> patch make the build slow down.
> 
> Yes,  it's not faster either. I think that because the Ovmf platform 
> is relatively simple.  From the build tool source code point of view, 
> the customized deepcopy will take effect if the platform enabled 
> multiple SKU or there are many expressions in metadata file to be 
> evaluated. And the "cache for uni file parser" needs there are many 
> uni files.  The Ovmf platform looks not a good platform to demo the 
> effect of this patch.

But surely we should not introduce patches said to improve performance when the only data we have available shows that they slow things down?

If the performance data is not representative, then it is worthless.

Don't get me wrong - if you say "and for this secret platform I can't share with you, it improves build performance by X", then I may be OK with a minor slowdown on the platforms I do have available to test, if X is not minor.

But if the improvement is only theoretical, and we have no evidence that it helps real platforms, it should not be committed.

Regards,

Leif


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-11  8:48                 ` Feng, Bob C
@ 2018-12-12 18:37                   ` Leif Lindholm
  2018-12-13  1:50                     ` Gao, Liming
  0 siblings, 1 reply; 18+ messages in thread
From: Leif Lindholm @ 2018-12-12 18:37 UTC (permalink / raw)
  To: Feng, Bob C; +Cc: edk2-devel@lists.01.org, Carsey, Jaben, Gao, Liming

On Tue, Dec 11, 2018 at 08:48:19AM +0000, Feng, Bob C wrote:
> Hi Leif,
> 
> I understand your concern.  
> 
> I collected another performance data set based on open source
> MinKabylake platform and updated the BZ
> https://bugzilla.tianocore.org/show_bug.cgi?id=1288. The data looks
> better than Ovmf. It enabled multiple SKU.
> 
> Before I sent those patch, I did verify them on intel real
> platforms. It improves the build performance. But it's not
> convenient to share those data.

So, I have two comments on this:
1) How can it be inconvenient to share information on build times? I
   don't even care what the names or codenames for those platforms
   are. If you are unable to tell us why what you have done matters,
   the code changes do not belong in the public tree.
   Clearly having good performance numbers for public platforms is the
   easiest solution for this problem.
2) Submissions of improvements to build system performance should be
   verified building real platforms. It should not be a question of
   "find some other platform to get numbers from once we have improved
   performance for building our confidential platforms".

Regards,

Leif
> 
> Thanks,
> Bob
> 
> -----Original Message-----
> From: Leif Lindholm [mailto:leif.lindholm@linaro.org] 
> Sent: Monday, December 10, 2018 8:36 PM
> To: Feng, Bob C <bob.c.feng@intel.com>
> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> On Mon, Dec 10, 2018 at 12:09:23PM +0000, Feng, Bob C wrote:
> > For the "customized deepcopy" and "cache for uni file parser" data, 
> > you can see the AutoGen is not slower. The whole Build Duration is 
> > longer because Make Duration is longer while Make Duration time 
> > depends on the external make, compiler and linker. So it's not the 
> > patch make the build slow down.
> > 
> > Yes,  it's not faster either. I think that because the Ovmf platform 
> > is relatively simple.  From the build tool source code point of view, 
> > the customized deepcopy will take effect if the platform enabled 
> > multiple SKU or there are many expressions in metadata file to be 
> > evaluated. And the "cache for uni file parser" needs there are many 
> > uni files.  The Ovmf platform looks not a good platform to demo the 
> > effect of this patch.
> 
> But surely we should not introduce patches said to improve performance when the only data we have available shows that they slow things down?
> 
> If the performance data is not representative, then it is worthless.
> 
> Don't get me wrong - if you say "and for this secret platform I can't share with you, it improves build performance by X", then I may be OK with a minor slowdown on the platforms I do have available to test, if X is not minor.
> 
> But if the improvement is only theoretical, and we have no evidence that it helps real platforms, it should not be committed.
> 
> Regards,
> 
> Leif


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-12 18:37                   ` Leif Lindholm
@ 2018-12-13  1:50                     ` Gao, Liming
  2018-12-13 10:08                       ` Leif Lindholm
  0 siblings, 1 reply; 18+ messages in thread
From: Gao, Liming @ 2018-12-13  1:50 UTC (permalink / raw)
  To: Leif Lindholm, Feng, Bob C
  Cc: Carsey, Jaben, edk2-devel@lists.01.org, Gao, Liming

Leif:
  Kabylake platform is the real Intel hardware.  The MinKabylake is the minimal feature of the Kabylake BIOS. Here is MinKabylake BIOS code https://github.com/tianocore/edk2-platforms/tree/devel-MinPlatform
  Bob adds the build performance data of MinKabylake into https://bugzilla.tianocore.org/show_bug.cgi?id=1288. 

The original build performance data:
Build Duration:       00:02:23
AutoGen Duration:     00:00:42
Make Duration:        00:01:12
GenFds Duration:      00:00:27

After apply the patch, clean build performance is reduced from 2:23 to 1:57. So, I think his patch improves build performance. 
Build Duration:       00:01:57
AutoGen Duration:     00:00:23
Make Duration:        00:01:12
GenFds Duration:      00:00:21

Thanks
Liming
>-----Original Message-----
>From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Leif
>Lindholm
>Sent: Thursday, December 13, 2018 2:37 AM
>To: Feng, Bob C <bob.c.feng@intel.com>
>Cc: Carsey, Jaben <jaben.carsey@intel.com>; edk2-devel@lists.01.org; Gao,
>Liming <liming.gao@intel.com>
>Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
>
>On Tue, Dec 11, 2018 at 08:48:19AM +0000, Feng, Bob C wrote:
>> Hi Leif,
>>
>> I understand your concern.
>>
>> I collected another performance data set based on open source
>> MinKabylake platform and updated the BZ
>> https://bugzilla.tianocore.org/show_bug.cgi?id=1288. The data looks
>> better than Ovmf. It enabled multiple SKU.
>>
>> Before I sent those patch, I did verify them on intel real
>> platforms. It improves the build performance. But it's not
>> convenient to share those data.
>
>So, I have two comments on this:
>1) How can it be inconvenient to share information on build times? I
>   don't even care what the names or codenames for those platforms
>   are. If you are unable to tell us why what you have done matters,
>   the code changes do not belong in the public tree.
>   Clearly having good performance numbers for public platforms is the
>   easiest solution for this problem.
>2) Submissions of improvements to build system performance should be
>   verified building real platforms. It should not be a question of
>   "find some other platform to get numbers from once we have improved
>   performance for building our confidential platforms".
>
>Regards,
>
>Leif
>>
>> Thanks,
>> Bob
>>
>> -----Original Message-----
>> From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
>> Sent: Monday, December 10, 2018 8:36 PM
>> To: Feng, Bob C <bob.c.feng@intel.com>
>> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao,
>Liming <liming.gao@intel.com>
>> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
>>
>> On Mon, Dec 10, 2018 at 12:09:23PM +0000, Feng, Bob C wrote:
>> > For the "customized deepcopy" and "cache for uni file parser" data,
>> > you can see the AutoGen is not slower. The whole Build Duration is
>> > longer because Make Duration is longer while Make Duration time
>> > depends on the external make, compiler and linker. So it's not the
>> > patch make the build slow down.
>> >
>> > Yes,  it's not faster either. I think that because the Ovmf platform
>> > is relatively simple.  From the build tool source code point of view,
>> > the customized deepcopy will take effect if the platform enabled
>> > multiple SKU or there are many expressions in metadata file to be
>> > evaluated. And the "cache for uni file parser" needs there are many
>> > uni files.  The Ovmf platform looks not a good platform to demo the
>> > effect of this patch.
>>
>> But surely we should not introduce patches said to improve performance
>when the only data we have available shows that they slow things down?
>>
>> If the performance data is not representative, then it is worthless.
>>
>> Don't get me wrong - if you say "and for this secret platform I can't share
>with you, it improves build performance by X", then I may be OK with a minor
>slowdown on the platforms I do have available to test, if X is not minor.
>>
>> But if the improvement is only theoretical, and we have no evidence that it
>helps real platforms, it should not be committed.
>>
>> Regards,
>>
>> Leif
>_______________________________________________
>edk2-devel mailing list
>edk2-devel@lists.01.org
>https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-13  1:50                     ` Gao, Liming
@ 2018-12-13 10:08                       ` Leif Lindholm
  2018-12-13 14:01                         ` Gao, Liming
  0 siblings, 1 reply; 18+ messages in thread
From: Leif Lindholm @ 2018-12-13 10:08 UTC (permalink / raw)
  To: Gao, Liming; +Cc: Feng, Bob C, Carsey, Jaben, edk2-devel@lists.01.org

Hi Liming,

Yes, this is fine.
But for future submissions, I would like for this sort of information
to be provided at the time of posting.

Not the full breakdown, but "reduceces build time of platform XXX on
hardware YYY by A%/B seconds".

Ideally, more than one platform and more than one hardware should be
provided, but at least during this initial improvement phase I'm also
happy for the assumption being that unless someone else complains,
it's fine on others.

Regards,

Leif

On Thu, Dec 13, 2018 at 01:50:35AM +0000, Gao, Liming wrote:
> Leif:
>   Kabylake platform is the real Intel hardware.  The MinKabylake is the minimal feature of the Kabylake BIOS. Here is MinKabylake BIOS code https://github.com/tianocore/edk2-platforms/tree/devel-MinPlatform
>   Bob adds the build performance data of MinKabylake into https://bugzilla.tianocore.org/show_bug.cgi?id=1288. 
> 
> The original build performance data:
> Build Duration:       00:02:23
> AutoGen Duration:     00:00:42
> Make Duration:        00:01:12
> GenFds Duration:      00:00:27
> 
> After apply the patch, clean build performance is reduced from 2:23 to 1:57. So, I think his patch improves build performance. 
> Build Duration:       00:01:57
> AutoGen Duration:     00:00:23
> Make Duration:        00:01:12
> GenFds Duration:      00:00:21
> 
> Thanks
> Liming
> >-----Original Message-----
> >From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Leif
> >Lindholm
> >Sent: Thursday, December 13, 2018 2:37 AM
> >To: Feng, Bob C <bob.c.feng@intel.com>
> >Cc: Carsey, Jaben <jaben.carsey@intel.com>; edk2-devel@lists.01.org; Gao,
> >Liming <liming.gao@intel.com>
> >Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> >
> >On Tue, Dec 11, 2018 at 08:48:19AM +0000, Feng, Bob C wrote:
> >> Hi Leif,
> >>
> >> I understand your concern.
> >>
> >> I collected another performance data set based on open source
> >> MinKabylake platform and updated the BZ
> >> https://bugzilla.tianocore.org/show_bug.cgi?id=1288. The data looks
> >> better than Ovmf. It enabled multiple SKU.
> >>
> >> Before I sent those patch, I did verify them on intel real
> >> platforms. It improves the build performance. But it's not
> >> convenient to share those data.
> >
> >So, I have two comments on this:
> >1) How can it be inconvenient to share information on build times? I
> >   don't even care what the names or codenames for those platforms
> >   are. If you are unable to tell us why what you have done matters,
> >   the code changes do not belong in the public tree.
> >   Clearly having good performance numbers for public platforms is the
> >   easiest solution for this problem.
> >2) Submissions of improvements to build system performance should be
> >   verified building real platforms. It should not be a question of
> >   "find some other platform to get numbers from once we have improved
> >   performance for building our confidential platforms".
> >
> >Regards,
> >
> >Leif
> >>
> >> Thanks,
> >> Bob
> >>
> >> -----Original Message-----
> >> From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> >> Sent: Monday, December 10, 2018 8:36 PM
> >> To: Feng, Bob C <bob.c.feng@intel.com>
> >> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao,
> >Liming <liming.gao@intel.com>
> >> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> >>
> >> On Mon, Dec 10, 2018 at 12:09:23PM +0000, Feng, Bob C wrote:
> >> > For the "customized deepcopy" and "cache for uni file parser" data,
> >> > you can see the AutoGen is not slower. The whole Build Duration is
> >> > longer because Make Duration is longer while Make Duration time
> >> > depends on the external make, compiler and linker. So it's not the
> >> > patch make the build slow down.
> >> >
> >> > Yes,  it's not faster either. I think that because the Ovmf platform
> >> > is relatively simple.  From the build tool source code point of view,
> >> > the customized deepcopy will take effect if the platform enabled
> >> > multiple SKU or there are many expressions in metadata file to be
> >> > evaluated. And the "cache for uni file parser" needs there are many
> >> > uni files.  The Ovmf platform looks not a good platform to demo the
> >> > effect of this patch.
> >>
> >> But surely we should not introduce patches said to improve performance
> >when the only data we have available shows that they slow things down?
> >>
> >> If the performance data is not representative, then it is worthless.
> >>
> >> Don't get me wrong - if you say "and for this secret platform I can't share
> >with you, it improves build performance by X", then I may be OK with a minor
> >slowdown on the platforms I do have available to test, if X is not minor.
> >>
> >> But if the improvement is only theoretical, and we have no evidence that it
> >helps real platforms, it should not be committed.
> >>
> >> Regards,
> >>
> >> Leif
> >_______________________________________________
> >edk2-devel mailing list
> >edk2-devel@lists.01.org
> >https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [Patch] BaseTools: Optimize string concatenation
  2018-12-13 10:08                       ` Leif Lindholm
@ 2018-12-13 14:01                         ` Gao, Liming
  0 siblings, 0 replies; 18+ messages in thread
From: Gao, Liming @ 2018-12-13 14:01 UTC (permalink / raw)
  To: Leif Lindholm; +Cc: Carsey, Jaben, edk2-devel@lists.01.org

Leif:
  Agree your point to prepare the data before the patch instead of after the patch. 

Thanks
Liming
> -----Original Message-----
> From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Leif Lindholm
> Sent: Thursday, December 13, 2018 6:09 PM
> To: Gao, Liming <liming.gao@intel.com>
> Cc: Carsey, Jaben <jaben.carsey@intel.com>; edk2-devel@lists.01.org
> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> 
> Hi Liming,
> 
> Yes, this is fine.
> But for future submissions, I would like for this sort of information
> to be provided at the time of posting.
> 
> Not the full breakdown, but "reduceces build time of platform XXX on
> hardware YYY by A%/B seconds".
> 
> Ideally, more than one platform and more than one hardware should be
> provided, but at least during this initial improvement phase I'm also
> happy for the assumption being that unless someone else complains,
> it's fine on others.
> 
> Regards,
> 
> Leif
> 
> On Thu, Dec 13, 2018 at 01:50:35AM +0000, Gao, Liming wrote:
> > Leif:
> >   Kabylake platform is the real Intel hardware.  The MinKabylake is the minimal feature of the Kabylake BIOS. Here is MinKabylake
> BIOS code https://github.com/tianocore/edk2-platforms/tree/devel-MinPlatform
> >   Bob adds the build performance data of MinKabylake into https://bugzilla.tianocore.org/show_bug.cgi?id=1288.
> >
> > The original build performance data:
> > Build Duration:       00:02:23
> > AutoGen Duration:     00:00:42
> > Make Duration:        00:01:12
> > GenFds Duration:      00:00:27
> >
> > After apply the patch, clean build performance is reduced from 2:23 to 1:57. So, I think his patch improves build performance.
> > Build Duration:       00:01:57
> > AutoGen Duration:     00:00:23
> > Make Duration:        00:01:12
> > GenFds Duration:      00:00:21
> >
> > Thanks
> > Liming
> > >-----Original Message-----
> > >From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Leif
> > >Lindholm
> > >Sent: Thursday, December 13, 2018 2:37 AM
> > >To: Feng, Bob C <bob.c.feng@intel.com>
> > >Cc: Carsey, Jaben <jaben.carsey@intel.com>; edk2-devel@lists.01.org; Gao,
> > >Liming <liming.gao@intel.com>
> > >Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> > >
> > >On Tue, Dec 11, 2018 at 08:48:19AM +0000, Feng, Bob C wrote:
> > >> Hi Leif,
> > >>
> > >> I understand your concern.
> > >>
> > >> I collected another performance data set based on open source
> > >> MinKabylake platform and updated the BZ
> > >> https://bugzilla.tianocore.org/show_bug.cgi?id=1288. The data looks
> > >> better than Ovmf. It enabled multiple SKU.
> > >>
> > >> Before I sent those patch, I did verify them on intel real
> > >> platforms. It improves the build performance. But it's not
> > >> convenient to share those data.
> > >
> > >So, I have two comments on this:
> > >1) How can it be inconvenient to share information on build times? I
> > >   don't even care what the names or codenames for those platforms
> > >   are. If you are unable to tell us why what you have done matters,
> > >   the code changes do not belong in the public tree.
> > >   Clearly having good performance numbers for public platforms is the
> > >   easiest solution for this problem.
> > >2) Submissions of improvements to build system performance should be
> > >   verified building real platforms. It should not be a question of
> > >   "find some other platform to get numbers from once we have improved
> > >   performance for building our confidential platforms".
> > >
> > >Regards,
> > >
> > >Leif
> > >>
> > >> Thanks,
> > >> Bob
> > >>
> > >> -----Original Message-----
> > >> From: Leif Lindholm [mailto:leif.lindholm@linaro.org]
> > >> Sent: Monday, December 10, 2018 8:36 PM
> > >> To: Feng, Bob C <bob.c.feng@intel.com>
> > >> Cc: edk2-devel@lists.01.org; Carsey, Jaben <jaben.carsey@intel.com>; Gao,
> > >Liming <liming.gao@intel.com>
> > >> Subject: Re: [edk2] [Patch] BaseTools: Optimize string concatenation
> > >>
> > >> On Mon, Dec 10, 2018 at 12:09:23PM +0000, Feng, Bob C wrote:
> > >> > For the "customized deepcopy" and "cache for uni file parser" data,
> > >> > you can see the AutoGen is not slower. The whole Build Duration is
> > >> > longer because Make Duration is longer while Make Duration time
> > >> > depends on the external make, compiler and linker. So it's not the
> > >> > patch make the build slow down.
> > >> >
> > >> > Yes,  it's not faster either. I think that because the Ovmf platform
> > >> > is relatively simple.  From the build tool source code point of view,
> > >> > the customized deepcopy will take effect if the platform enabled
> > >> > multiple SKU or there are many expressions in metadata file to be
> > >> > evaluated. And the "cache for uni file parser" needs there are many
> > >> > uni files.  The Ovmf platform looks not a good platform to demo the
> > >> > effect of this patch.
> > >>
> > >> But surely we should not introduce patches said to improve performance
> > >when the only data we have available shows that they slow things down?
> > >>
> > >> If the performance data is not representative, then it is worthless.
> > >>
> > >> Don't get me wrong - if you say "and for this secret platform I can't share
> > >with you, it improves build performance by X", then I may be OK with a minor
> > >slowdown on the platforms I do have available to test, if X is not minor.
> > >>
> > >> But if the improvement is only theoretical, and we have no evidence that it
> > >helps real platforms, it should not be committed.
> > >>
> > >> Regards,
> > >>
> > >> Leif
> > >_______________________________________________
> > >edk2-devel mailing list
> > >edk2-devel@lists.01.org
> > >https://lists.01.org/mailman/listinfo/edk2-devel
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2018-12-13 14:01 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-11-08 10:16 [Patch] BaseTools: Optimize string concatenation BobCF
2018-11-08 15:26 ` Carsey, Jaben
2018-11-08 16:40 ` Kinney, Michael D
2018-11-09 14:16   ` Gao, Liming
2018-11-09 17:01     ` Kinney, Michael D
2018-11-08 16:52 ` Leif Lindholm
2018-11-09  3:25   ` Feng, Bob C
2018-11-09 11:48     ` Leif Lindholm
2018-11-11  0:41       ` Feng, Bob C
2018-12-05  2:51         ` Feng, Bob C
2018-12-10 10:47           ` Leif Lindholm
2018-12-10 12:09             ` Feng, Bob C
2018-12-10 12:35               ` Leif Lindholm
2018-12-11  8:48                 ` Feng, Bob C
2018-12-12 18:37                   ` Leif Lindholm
2018-12-13  1:50                     ` Gao, Liming
2018-12-13 10:08                       ` Leif Lindholm
2018-12-13 14:01                         ` Gao, Liming

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox