diff --git a/Source/DelphiAST.Classes.pas b/Source/DelphiAST.Classes.pas index fbccda28..b084eab5 100644 --- a/Source/DelphiAST.Classes.pas +++ b/Source/DelphiAST.Classes.pas @@ -5,7 +5,8 @@ interface uses - SysUtils, Generics.Collections, SimpleParser.Lexer.Types, DelphiAST.Consts; + SysUtils, Generics.Collections, SimpleParser.Lexer.Types, DelphiAST.Consts + {$ifdef USESTRINGCACHE}, SimpleParser.StringCache{$endif}; type EParserException = class(Exception) @@ -19,8 +20,14 @@ EParserException = class(Exception) property Line: Integer read FLine; property Col: Integer read FCol; end; + + {$ifdef USESTRINGCACHE} + TAttributeEntryValue = TStringId; + {$else} + TAttributeEntryValue = string; + {$endif} - TAttributeEntry = TPair; + TAttributeEntry = TPair; PAttributeEntry = ^TAttributeEntry; TSyntaxNodeClass = class of TSyntaxNode; @@ -32,6 +39,8 @@ TSyntaxNode = class function GetHasChildren: Boolean; function GetHasAttributes: Boolean; function TryGetAttributeEntry(const Key: TAttributeName; var AttributeEntry: PAttributeEntry): boolean; + procedure SetAttributeInternal(const Key: TAttributeName; const Value: TAttributeEntryValue); + {$ifdef USESTRINGCACHE}procedure SetAttribute(const Key: TAttributeName; const Value: TStringId); overload;{$endif} protected FAttributes: TArray; FChildNodes: TArray; @@ -45,7 +54,7 @@ TSyntaxNode = class function GetAttribute(const Key: TAttributeName): string; function HasAttribute(const Key: TAttributeName): Boolean; - procedure SetAttribute(const Key: TAttributeName; const Value: string); + procedure SetAttribute(const Key: TAttributeName; const Value: string); {$ifdef USESTRINGCACHE}overload;{$endif} procedure ClearAttributes; function AddChild(Node: TSyntaxNode): TSyntaxNode; overload; @@ -80,11 +89,13 @@ TCompoundSyntaxNode = class(TSyntaxNode) TValuedSyntaxNode = class(TSyntaxNode) private - FValue: string; + FValue: {$ifdef USESTRINGCACHE}TStringId{$else}string{$endif}; + function GetValue: string; + procedure SetValue(const Value: string); public function Clone: TSyntaxNode; override; - property Value: string read FValue write FValue; + property Value: string read GetValue write SetValue; end; TCommentNode = class(TSyntaxNode) @@ -360,6 +371,27 @@ class procedure TExpressionTools.RawNodeListToTree(RawParentNode: TSyntaxNode; R { TSyntaxNode } procedure TSyntaxNode.SetAttribute(const Key: TAttributeName; const Value: string); +{$ifdef USESTRINGCACHE} + var + NewValue : TAttributeEntryValue; +{$endif} +begin + {$ifdef USESTRINGCACHE} + NewValue := TStringCache.Instance.Add(Value); + SetAttributeInternal(Key, NewValue); + {$else} + SetAttributeInternal(Key, Value); + {$endif} +end; + +{$ifdef USESTRINGCACHE} + procedure TSyntaxNode.SetAttribute(const Key: TAttributeName; const Value: TStringId); + begin + SetAttributeInternal(Key, Value); + end; +{$endif} + +procedure TSyntaxNode.SetAttributeInternal(const Key: TAttributeName; const Value: TAttributeEntryValue); var AttributeEntry: PAttributeEntry; NewAttributeEntry: TAttributeEntry; @@ -409,7 +441,7 @@ function TSyntaxNode.AddChild(Typ: TSyntaxNodeType): TSyntaxNode; function TSyntaxNode.Clone: TSyntaxNode; var ChildNode: TSyntaxNode; - Attr: TPair; + Attr: TPair; begin Result := TSyntaxNodeClass(Self.ClassType).Create(FTyp); @@ -431,6 +463,7 @@ constructor TSyntaxNode.Create(Typ: TSyntaxNodeType); SetLength(FAttributes, 0); SetLength(FChildNodes, 0); FParentNode := nil; + {$ifdef USESTRINGCACHE}TStringCache.Instance.IncRef;{$endif} end; procedure TSyntaxNode.ExtractChild(Node: TSyntaxNode); @@ -463,6 +496,8 @@ destructor TSyntaxNode.Destroy; var i: integer; begin + {$ifdef USESTRINGCACHE}TStringCache.Instance.DecRef;{$endif} + for i := 0 to Length(FChildNodes) - 1 do FChildNodes[i].Free; SetLength(FChildNodes, 0); @@ -489,7 +524,11 @@ function TSyntaxNode.GetAttribute(const Key: TAttributeName): string; AttributeEntry: PAttributeEntry; begin if TryGetAttributeEntry(Key, AttributeEntry) then - Result := AttributeEntry.Value + {$ifdef USESTRINGCACHE} + Result := TStringCache.Instance.Get(AttributeEntry.Value) + {$else} + Result := AttributeEntry.Value + {$endif} else Result := ''; end; @@ -535,6 +574,24 @@ function TValuedSyntaxNode.Clone: TSyntaxNode; TValuedSyntaxNode(Result).Value := Self.Value; end; +function TValuedSyntaxNode.GetValue: string; +begin + {$ifdef USESTRINGCACHE} + Result := TStringCache.Instance.Get(FValue); + {$else} + Result := FValue; + {$endif} +end; + +procedure TValuedSyntaxNode.SetValue(const Value: string); +begin + {$ifdef USESTRINGCACHE} + FValue := TStringCache.Instance.Add(Value); + {$else} + FValue := Value; + {$endif} +end; + { TCommentNode } function TCommentNode.Clone: TSyntaxNode; diff --git a/Source/DelphiAST.Writer.pas b/Source/DelphiAST.Writer.pas index 144fdc18..aaae77b8 100644 --- a/Source/DelphiAST.Writer.pas +++ b/Source/DelphiAST.Writer.pas @@ -21,7 +21,7 @@ TSyntaxTreeWriter = class implementation uses - Generics.Collections, DelphiAST.Consts; + Generics.Collections, DelphiAST.Consts, SimpleParser.StringCache; {$I SimpleParser.inc} {$IFDEF D18_NEWER} @@ -64,7 +64,7 @@ class procedure TSyntaxTreeWriter.NodeToXML(const Builder: TStringBuilder; var HasChildren: Boolean; NewIndent: string; - Attr: TPair; + Attr: TPair; ChildNode: TSyntaxNode; begin HasChildren := Node.HasChildren; @@ -94,7 +94,13 @@ class procedure TSyntaxTreeWriter.NodeToXML(const Builder: TStringBuilder; Builder.Append(' value="' + XMLEncode(TValuedSyntaxNode(Node).Value) + '"'); for Attr in Node.Attributes do - Builder.Append(' ' + AttributeNameToStr(Attr.Key) + '="' + XMLEncode(Attr.Value) + '"'); + Builder.Append(' ' + AttributeNameToStr(Attr.Key) + '="' + {$ifdef USESTRINGCACHE} + + XMLEncode(TStringCache.Instance.Get(Attr.Value)) + {$else} + + XMLEncode(Attr.Value) + {$endif} + + '"'); if HasChildren then Builder.Append('>') else diff --git a/Source/SimpleParser/SimpleParser.Lexer.pas b/Source/SimpleParser/SimpleParser.Lexer.pas index bcfbcbec..829118b7 100644 --- a/Source/SimpleParser/SimpleParser.Lexer.pas +++ b/Source/SimpleParser/SimpleParser.Lexer.pas @@ -397,7 +397,8 @@ TmwPasLex = class(TmwBasePasLex) implementation uses - StrUtils; + StrUtils + {$ifdef USESTRINGCACHE}, SimpleParser.StringCache{$endif}; type TmwPasLexExpressionEvaluation = (leeNone, leeAnd, leeOr); @@ -1313,10 +1314,18 @@ constructor TmwBasePasLex.Create; New(FBuffer); FillChar(FBuffer^, SizeOf(TBufferRec), 0); + + {$ifdef USESTRINGCACHE} + TStringCache.Instance.IncRef; + {$endif} end; destructor TmwBasePasLex.Destroy; begin + {$ifdef USESTRINGCACHE} + TStringCache.Instance.DecRef; + {$endif} + if not FBuffer.SharedBuffer then FreeMem(FBuffer.Buf); @@ -2224,7 +2233,11 @@ function TmwBasePasLex.GetIsSpace: Boolean; function TmwBasePasLex.GetToken: string; begin - SetString(Result, (FBuffer.Buf + FTokenPos), GetTokenLen); + {$ifdef USESTRINGCACHE} + Result := TStringCache.Instance.AddAndGet(FBuffer.Buf + FTokenPos, GetTokenLen); + {$else} + SetString(Result, (FBuffer.Buf + FTokenPos), GetTokenLen); + {$endif} end; function TmwBasePasLex.GetTokenLen: Integer; diff --git a/Source/SimpleParser/SimpleParser.StringCache.pas b/Source/SimpleParser/SimpleParser.StringCache.pas new file mode 100644 index 00000000..21547316 --- /dev/null +++ b/Source/SimpleParser/SimpleParser.StringCache.pas @@ -0,0 +1,331 @@ +unit SimpleParser.StringCache; + +{ + String cache: provides a global class to keep unique string instances, which + are then referred to by an ID. There are methods to then get a string given + an ID. This can greatly reduce the number of strings in memory, since all + strings with the same content will be the same actual string, stored in the + cache. + + Originally written by David Millington: vintagedave@gmail.com or dave@parnassus.co + Code donated to the DelphiAST project, April 2016. +} + +interface + +uses + System.Generics.Defaults, System.Generics.Collections, SyncObjs; + +// Use STRINGCACHE_THREADSAFE to ensure one instance can be accessed by multiple +// threads at once. This prevents clearing - it keeps all added elements for the +// life of the instance (life of the program if using TStringCache.Instance) +// and locks around adding / getting items. +// This is one by default +{$define STRINGCACHE_THREADSAFE} + +type + TStringId = type NativeInt; + + TStringCache = class + type + TStringRec = class + strict private + FValue : string; + FUsageCount : NativeUInt; + public + constructor Create(const AValue : string); + procedure IncUsageCount; + property UsageCount : NativeUInt read FUsageCount; + property Value : string read FValue; + end; + private + type + TStringRecValueEqualityComparer = class(TEqualityComparer) + private + FStringComparer : IEqualityComparer; + public + constructor Create(); + function Equals(const Left, Right: TStringRec): Boolean; overload; override; + function GetHashCode(const Value: TStringRec): Integer; overload; override; + end; + TStringRecUsageComparer = class(TInterfacedObject, IComparer) + function Compare(const Left, Right: TStringRec): Integer; + end; + strict private + FStringToId : TDictionary; + FRefCount : NativeInt; + {$ifdef STRINGCACHE_THREADSAFE} + FLock : TCriticalSection; + {$else} + // If threadsafe, always persistent, so only allow it to be changed when not threadsafe + FIsPersistent : Boolean; + {$endif} + + class var FInstance : TStringCache; + class constructor ClassCreate; + class destructor ClassDestroy; + + procedure Lock; inline; + procedure Unlock; inline; + private + FIdToString : TList; + function GetIsPersistent: Boolean; + procedure SetIsPersistent(const Value: Boolean); // ID is index + public + constructor Create; + destructor Destroy; override; + + function Add(const Value : string) : TStringId; + function AddAndGet(const P : PChar; const Length : Integer) : string; + function Get(const ID : TStringId) : string; + procedure Clear(const OnDestruction : Boolean = false); + procedure ByUsage(InOrder : TList); + + procedure IncRef; + procedure DecRef; + + property Persistent : Boolean read GetIsPersistent write SetIsPersistent; + class property Instance : TStringCache read FInstance; + end; + +implementation + +uses + SysUtils, Types; + +{ TStringCache.TStringRecValueEqualityComparer } + +constructor TStringCache.TStringRecValueEqualityComparer.Create; +begin + inherited Create(); + FStringComparer := TEqualityComparer.Default; +end; + +function TStringCache.TStringRecValueEqualityComparer.Equals(const Left, + Right: TStringRec): Boolean; +begin + // Compare by the string it holds only + Result := FStringComparer.Equals(Left.Value, Right.Value); +end; + +function TStringCache.TStringRecValueEqualityComparer.GetHashCode( + const Value: TStringRec): Integer; +begin + // Compare by the string it holds only + Result := FStringComparer.GetHashCode(Value.Value); +end; + +{ TStringCache.TStringRecUsageComparer } + +function TStringCache.TStringRecUsageComparer.Compare(const Left, + Right: TStringRec): Integer; +begin + if Left.UsageCount < Right.UsageCount then + Exit(LessThanValue) + else if Left.UsageCount > Right.UsageCount then + Exit(GreaterThanValue) + else // Usage is the same, sort by string + Exit(TComparer.Default.Compare(Left.Value, Right.Value)); +end; + +{ TStringCache } + +class constructor TStringCache.ClassCreate; +begin + FInstance := TStringCache.Create; +end; + +class destructor TStringCache.ClassDestroy; +begin + FInstance.Free; +end; + +constructor TStringCache.Create; +begin + inherited; + FRefCount := 0; + {$ifdef STRINGCACHE_THREADSAFE} + FLock := TCriticalSection.Create; + {$else} + FIsPersistent := false; // Clear the cache when no longer needed + {$endif} + FStringToId := TDictionary.Create( + TStringCache.TStringRecValueEqualityComparer.Create); + FIdToString := TList.Create; + + Add(''); // Empty string is always item 0 +end; + +destructor TStringCache.Destroy; +begin + assert(FRefCount = 0, 'String cache destroyed with live objects still relying on it'); + Clear(true); + FStringToId.Free; + FIdToString.Free; + {$ifdef STRINGCACHE_THREADSAFE} + FLock.Free; + {$endif} + inherited; +end; + +function TStringCache.Add(const Value: string): TStringId; +var + Item : TStringRec; +begin + Result := 0; + Item := TStringRec.Create(Value); + + Lock; + try + if FStringToId.TryGetValue(Item, Result) then begin + // Already exists. Increment the usage count of the existing one, and return + FIdToString[Result].IncUsageCount; + Item.Free; // Already exists, Item was search key only + Exit; + end; + + // Item does not yet exist + Result := FIdToString.Add(Item); + FStringToId.Add(Item, Result); + finally + Unlock; + end; +end; + +function TStringCache.AddAndGet(const P : PChar; const Length : Integer) : string; +var + SearchStr : string; +begin + SetString(SearchStr, P, Length); + + Lock; // Will enter in Get and Add too, but a CS can be entered multiple times + try + Result := Get(Add(SearchStr)); + finally + Unlock; + end; +end; + +function TStringCache.Get(const ID: TStringId): string; +begin + Lock; + try + if ID < FIdToString.Count then + Exit(FIdToString[ID].Value) + else + raise Exception.Create(Format('String cache entry with ID %d does not exist', [ID])); + finally + Unlock; + end; +end; + +procedure TStringCache.Clear(const OnDestruction : Boolean); +var + I : Integer; +begin + // This doesn't need a lock. When threadsafe, never cleared except on destruction + + if FRefCount <> 0 then + raise Exception.Create(Format('Clearing the string cache while objects still rely on it (%d)', [FRefCount])); + + // One instance of TStringRec, but stored in two lists. Free from only one + for I := 0 to Pred(FIdToString.Count) do + FIdToString[I].Free; + + FStringToId.Clear; + FIdToString.Clear; + + if not OnDestruction then begin + // Add emtpy string - it's always item 0 - unless this is being called as + // part of destruction + Add(''); + assert(Get(0) = ''); + end; +end; + +procedure TStringCache.ByUsage(InOrder: TList); +begin + Lock; + try + InOrder.InsertRange(0, FIdToString); + InOrder.Sort(TStringCache.TStringRecUsageComparer.Create); + finally + Unlock; + end; +end; + +function TStringCache.GetIsPersistent: Boolean; +begin + {$ifdef STRINGCACHE_THREADSAFE} + Result := true; // Never clears + {$else} + Result := FIsPersistent; + {$endif} +end; + +procedure TStringCache.SetIsPersistent(const Value: Boolean); +begin + // If threadsafe, always persistent (never clears) so don't set anything + {$ifndef STRINGCACHE_THREADSAFE} + FIsPersistent := Value; + {$endif} +end; + +procedure TStringCache.IncRef; +begin + // Keep a count of how many objects are using the string cache. This lets it + // clear itself when the last one is freed - ie, free all the strings when + // they are no longer needed. (The alternative, controlled by Persistent, + // is to keep them - ie make the cache persistent over multiple runs - useful + // for parsing the same or similar files over and over.) + AtomicIncrement(FRefCount); +end; + +procedure TStringCache.DecRef; +begin + if AtomicDecrement(FRefCount) < 0 then + raise Exception.Create('String cache refcount cannot be decremented below zero'); + + // When threadsafe, synchronizing clearing while ensuring the refcount is 0 + // (ie an addref dosn't occur while clearing) is hard without locking around + // IncRef and DecRef, which is expensive. So just don't clear. + {$ifndef STRINGCACHE_THREADSAFE} + // Unless want to keep the strings around for next parse, clear now nothing is + // using any of them. + if (FRefCount = 0) and (not Persistent) then + Clear; + {$endif} +end; + +procedure TStringCache.Lock; +begin + // If not threadsafe, nothing to do here + {$ifdef STRINGCACHE_THREADSAFE} + FLock.Acquire; + {$endif} +end; + +procedure TStringCache.Unlock; +begin + // If not threadsafe, nothing to do here + {$ifdef STRINGCACHE_THREADSAFE} + FLock.Release; + {$endif} +end; + + +{ TStringCache.TStringRec } + +constructor TStringCache.TStringRec.Create(const AValue: string); +begin + inherited Create; + FValue := AValue; + FUsageCount := 1; +end; + +procedure TStringCache.TStringRec.IncUsageCount; +begin + Inc(FUsageCount); +end; + +end.