-
Notifications
You must be signed in to change notification settings - Fork 995
transform: elide allocations from byte slice equality tests #5067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -178,3 +178,108 @@ func OptimizeReflectImplements(mod llvm.Module) { | |
| call.EraseFromParentAsInstruction() | ||
| } | ||
| } | ||
|
|
||
| // OptimizeStringFromBytes removes allocations for byte slice equality | ||
| // checks that use temporary strings. In particular, `bytes.Equal` allocates | ||
| // two such strings: | ||
| // | ||
| // func Equal(a, b []byte) bool { | ||
| // return string(a) == string(b) | ||
| // } | ||
| func OptimizeStringFromBytes(mod llvm.Module) { | ||
| stringFromBytes := mod.NamedFunction("runtime.stringFromBytes") | ||
| if stringFromBytes.IsNil() { | ||
| return | ||
| } | ||
| stringEqual := mod.NamedFunction("runtime.stringEqual") | ||
| if stringEqual.IsNil() { | ||
| return | ||
| } | ||
|
|
||
| uses: | ||
| for _, call := range getUses(stringFromBytes) { | ||
| sliceptr := call.Operand(0) | ||
| slicelen := call.Operand(1) | ||
| // Collect all uses of the slice pointer while replacing | ||
| // uses of the string length. | ||
| uses := make(map[llvm.Value]bool) | ||
| if !collectStringFromBytesUses(uses, slicelen, call) { | ||
| continue | ||
| } | ||
| inst := call | ||
| found := 0 | ||
| // Scan instructions that follow the stringFromBytes call to | ||
| // account for all uses. Bail if any instruction may mutate the | ||
| // slice storage. | ||
| for len(uses) > found { | ||
| inst = llvm.NextInstruction(inst) | ||
| if inst.IsNil() { | ||
| // There are uses beyond this basic block. | ||
| continue uses | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find this to be hard to read. Can you refactor this code to avoid the |
||
| } | ||
| switch { | ||
| case !inst.IsACallInst().IsNil(): | ||
| if inst.CalledValue() != stringEqual { | ||
| // The called function is not runtime.stringEqual | ||
| // and may mutate the slice. | ||
| continue uses | ||
| } | ||
| case !inst.IsAGetElementPtrInst().IsNil(), | ||
| !inst.IsALoadInst().IsNil(), | ||
| !inst.IsAExtractValueInst().IsNil(): | ||
| // Read-only instructions. | ||
| default: | ||
| // Instruction may perform a store on the slice. | ||
| continue uses | ||
| } | ||
| if _, ok := uses[inst]; ok { | ||
| found++ | ||
| } | ||
| } | ||
| // At this point, all instructions between the stringFromBytes call | ||
| // and its uses are known not to mutate the slice storage. Replace | ||
| // all string pointer uses with the slice pointer and get rid of | ||
| // the call. | ||
| for use, repl := range uses { | ||
| if repl { | ||
| use.ReplaceAllUsesWith(sliceptr) | ||
| use.EraseFromParentAsInstruction() | ||
| } | ||
| } | ||
| call.EraseFromParentAsInstruction() | ||
| } | ||
| } | ||
|
|
||
| // collectStringFromBytesUses collects the string pointer uses, while replacing string | ||
| // length uses with the equivalent slice length. | ||
| func collectStringFromBytesUses(uses map[llvm.Value]bool, slicelen, v llvm.Value) bool { | ||
| if v.IsNil() { | ||
| return true | ||
| } | ||
| for _, use := range getUses(v) { | ||
| switch { | ||
| case !use.IsAExtractValueInst().IsNil(): | ||
| switch use.Type().TypeKind() { | ||
| case llvm.IntegerTypeKind: | ||
| // String length can always safely be replaced with slice length. | ||
| use.ReplaceAllUsesWith(slicelen) | ||
| use.EraseFromParentAsInstruction() | ||
| case llvm.PointerTypeKind: | ||
| if !collectStringFromBytesUses(uses, slicelen, use) { | ||
| return false | ||
| } | ||
| // Record the use as replaceable with the slice pointer. | ||
| uses[use] = true | ||
| default: | ||
| return false | ||
| } | ||
| case !use.IsACallInst().IsNil(): | ||
| // Record the use, but don't replace it. | ||
| uses[use] = false | ||
| default: | ||
| // Give up. | ||
| return false | ||
| } | ||
| } | ||
| return true | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be really nice to have some tests that are easier to read than LLVM IR. Could you add some tests like transform/testdata/allocs2.go? (This will probably require some refactoring to not duplicate code). This will also show that the optimization works for real Go code, instead of handcrafted IR (that might go out of date). |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
| target triple = "x86_64--linux" | ||
|
|
||
| @str = constant [6 x i8] c"foobar" | ||
|
|
||
| declare { ptr, i64, i64 } @runtime.stringToBytes(ptr, i64) | ||
|
|
||
| declare { ptr, i64 } @runtime.stringFromBytes(ptr, i64, i64) | ||
|
|
||
| declare i1 @runtime.stringEqual(ptr nocapture, i64, ptr nocapture, i64) | ||
|
|
||
| declare void @maybeSideEffect() | ||
|
|
||
| declare void @readString(ptr nocapture, i64) | ||
|
|
||
| define void @testReadOnly() { | ||
| entry: | ||
| ; Build byte slice. | ||
| %0 = call fastcc { ptr, i64, i64 } @runtime.stringToBytes(ptr @str, i64 6) | ||
| %1 = extractvalue { ptr, i64, i64 } %0, 0 | ||
| %2 = extractvalue { ptr, i64, i64 } %0, 1 | ||
| %3 = extractvalue { ptr, i64, i64 } %0, 2 | ||
|
|
||
| ; Test that a side-effect free string equality check can optimize the stringFromBytes | ||
| ; call away. | ||
| %4 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) | ||
| %5 = extractvalue { ptr, i64, i64 } %4, 0 | ||
| %6 = extractvalue { ptr, i64, i64 } %4, 1 | ||
| call fastcc i1 @runtime.stringEqual(ptr %5, i64 %6, ptr %5, i64 %6) | ||
|
|
||
| ; Compare it again, but with an intermittent side-effect that blocks the optimization. | ||
| %9 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) | ||
| %10 = extractvalue { ptr, i64, i64 } %9, 0 | ||
| %11 = extractvalue { ptr, i64, i64 } %9, 1 | ||
| ; Function call may write to the slice storage. | ||
| call fastcc void @maybeSideEffect() | ||
| call fastcc i1 @runtime.stringEqual(ptr %10, i64 %11, ptr %10, i64 %11) | ||
|
|
||
| ; Reading the string after comparing should also defeat the optimization. | ||
| %13 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) | ||
| %14 = extractvalue { ptr, i64, i64 } %13, 0 | ||
| %15 = extractvalue { ptr, i64, i64 } %13, 1 | ||
| call fastcc i1 @runtime.stringEqual(ptr %14, i64 %15, ptr %14, i64 %15) | ||
| call fastcc void @readString(ptr %14, i64 %15) | ||
| ret void | ||
| } | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||
| target triple = "x86_64--linux" | ||
|
|
||
| @str = constant [6 x i8] c"foobar" | ||
|
|
||
| declare { ptr, i64, i64 } @runtime.stringToBytes(ptr, i64) | ||
|
|
||
| declare { ptr, i64 } @runtime.stringFromBytes(ptr, i64, i64) | ||
|
|
||
| declare i1 @runtime.stringEqual(ptr nocapture, i64, ptr nocapture, i64) | ||
|
|
||
| declare void @maybeSideEffect() | ||
|
|
||
| declare void @readString(ptr nocapture, i64) | ||
|
|
||
| define void @testReadOnly() { | ||
| entry: | ||
| %0 = call fastcc { ptr, i64, i64 } @runtime.stringToBytes(ptr @str, i64 6) | ||
| %1 = extractvalue { ptr, i64, i64 } %0, 0 | ||
| %2 = extractvalue { ptr, i64, i64 } %0, 1 | ||
| %3 = extractvalue { ptr, i64, i64 } %0, 2 | ||
| %4 = call fastcc i1 @runtime.stringEqual(ptr %1, i64 %2, ptr %1, i64 %2) | ||
| %5 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) | ||
| %6 = extractvalue { ptr, i64, i64 } %5, 0 | ||
| call fastcc void @maybeSideEffect() | ||
| %7 = call fastcc i1 @runtime.stringEqual(ptr %6, i64 %2, ptr %6, i64 %2) | ||
| %8 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) | ||
| %9 = extractvalue { ptr, i64, i64 } %8, 0 | ||
| %10 = call fastcc i1 @runtime.stringEqual(ptr %9, i64 %2, ptr %9, i64 %2) | ||
| call fastcc void @readString(ptr %9, i64 %2) | ||
| ret void | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could be generalized to any function:
memory(argmem: read),memory(read), etc (in other words, doesn't modify memory). This should be automatically deduced forruntime.stringEqualbut if not we can add it to compiler/symbol.go.nocapture(meaning the pointer parameter is not kept across the function call).For details, see: https://llvm.org/docs/LangRef.html#fnattrs and https://llvm.org/docs/LangRef.html#paramattrs.