diff --git a/transform/optimizer.go b/transform/optimizer.go index 54f9762bc4..ed57e2e547 100644 --- a/transform/optimizer.go +++ b/transform/optimizer.go @@ -65,6 +65,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config) []error { // Run TinyGo-specific optimization passes. OptimizeStringToBytes(mod) + OptimizeStringFromBytes(mod) OptimizeReflectImplements(mod) maxStackSize := config.MaxStackAlloc() OptimizeAllocs(mod, nil, maxStackSize, nil) @@ -91,6 +92,7 @@ func Optimize(mod llvm.Module, config *compileopts.Config) []error { fmt.Fprintln(os.Stderr, pos.String()+": "+msg) }) OptimizeStringToBytes(mod) + OptimizeStringFromBytes(mod) OptimizeStringEqual(mod) } else { diff --git a/transform/rtcalls.go b/transform/rtcalls.go index 3abc1d3952..0017e1212e 100644 --- a/transform/rtcalls.go +++ b/transform/rtcalls.go @@ -178,3 +178,108 @@ func OptimizeReflectImplements(mod llvm.Module) { call.EraseFromParentAsInstruction() } } + +// OptimizeStringFromBytes removes allocations for byte slice equality +// checks that use temporary strings. In particular, `bytes.Equal` allocates +// two such strings: +// +// func Equal(a, b []byte) bool { +// return string(a) == string(b) +// } +func OptimizeStringFromBytes(mod llvm.Module) { + stringFromBytes := mod.NamedFunction("runtime.stringFromBytes") + if stringFromBytes.IsNil() { + return + } + stringEqual := mod.NamedFunction("runtime.stringEqual") + if stringEqual.IsNil() { + return + } + +uses: + for _, call := range getUses(stringFromBytes) { + sliceptr := call.Operand(0) + slicelen := call.Operand(1) + // Collect all uses of the slice pointer while replacing + // uses of the string length. + uses := make(map[llvm.Value]bool) + if !collectStringFromBytesUses(uses, slicelen, call) { + continue + } + inst := call + found := 0 + // Scan instructions that follow the stringFromBytes call to + // account for all uses. Bail if any instruction may mutate the + // slice storage. + for len(uses) > found { + inst = llvm.NextInstruction(inst) + if inst.IsNil() { + // There are uses beyond this basic block. + continue uses + } + switch { + case !inst.IsACallInst().IsNil(): + if inst.CalledValue() != stringEqual { + // The called function is not runtime.stringEqual + // and may mutate the slice. + continue uses + } + case !inst.IsAGetElementPtrInst().IsNil(), + !inst.IsALoadInst().IsNil(), + !inst.IsAExtractValueInst().IsNil(): + // Read-only instructions. + default: + // Instruction may perform a store on the slice. + continue uses + } + if _, ok := uses[inst]; ok { + found++ + } + } + // At this point, all instructions between the stringFromBytes call + // and its uses are known not to mutate the slice storage. Replace + // all string pointer uses with the slice pointer and get rid of + // the call. + for use, repl := range uses { + if repl { + use.ReplaceAllUsesWith(sliceptr) + use.EraseFromParentAsInstruction() + } + } + call.EraseFromParentAsInstruction() + } +} + +// collectStringFromBytesUses collects the string pointer uses, while replacing string +// length uses with the equivalent slice length. +func collectStringFromBytesUses(uses map[llvm.Value]bool, slicelen, v llvm.Value) bool { + if v.IsNil() { + return true + } + for _, use := range getUses(v) { + switch { + case !use.IsAExtractValueInst().IsNil(): + switch use.Type().TypeKind() { + case llvm.IntegerTypeKind: + // String length can always safely be replaced with slice length. + use.ReplaceAllUsesWith(slicelen) + use.EraseFromParentAsInstruction() + case llvm.PointerTypeKind: + if !collectStringFromBytesUses(uses, slicelen, use) { + return false + } + // Record the use as replaceable with the slice pointer. + uses[use] = true + default: + return false + } + case !use.IsACallInst().IsNil(): + // Record the use, but don't replace it. + uses[use] = false + default: + // Give up. + return false + } + } + return true +} diff --git a/transform/rtcalls_test.go b/transform/rtcalls_test.go index 9073b0ea5b..8c0c33e117 100644 --- a/transform/rtcalls_test.go +++ b/transform/rtcalls_test.go @@ -30,3 +30,11 @@ func TestOptimizeReflectImplements(t *testing.T) { transform.OptimizeReflectImplements(mod) }) } + +func TestOptimizeBytesFromString(t *testing.T) { + t.Parallel() + testTransform(t, "testdata/stringfrombytes", func(mod llvm.Module) { + // Run optimization pass. + transform.OptimizeStringFromBytes(mod) + }) +} diff --git a/transform/testdata/stringfrombytes.ll b/transform/testdata/stringfrombytes.ll new file mode 100644 index 0000000000..10eb8d4392 --- /dev/null +++ b/transform/testdata/stringfrombytes.ll @@ -0,0 +1,47 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--linux" + +@str = constant [6 x i8] c"foobar" + +declare { ptr, i64, i64 } @runtime.stringToBytes(ptr, i64) + +declare { ptr, i64 } @runtime.stringFromBytes(ptr, i64, i64) + +declare i1 @runtime.stringEqual(ptr nocapture, i64, ptr nocapture, i64) + +declare void @maybeSideEffect() + +declare void @readString(ptr nocapture, i64) + +define void @testReadOnly() { +entry: + ; Build byte slice. + %0 = call fastcc { ptr, i64, i64 } @runtime.stringToBytes(ptr @str, i64 6) + %1 = extractvalue { ptr, i64, i64 } %0, 0 + %2 = extractvalue { ptr, i64, i64 } %0, 1 + %3 = extractvalue { ptr, i64, i64 } %0, 2 + + ; Test that a side-effect free string equality check can optimize the stringFromBytes + ; call away. + %4 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) + %5 = extractvalue { ptr, i64, i64 } %4, 0 + %6 = extractvalue { ptr, i64, i64 } %4, 1 + call fastcc i1 @runtime.stringEqual(ptr %5, i64 %6, ptr %5, i64 %6) + + ; Compare it again, but with an intermittent side-effect that blocks the optimization. + %9 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) + %10 = extractvalue { ptr, i64, i64 } %9, 0 + %11 = extractvalue { ptr, i64, i64 } %9, 1 + ; Function call may write to the slice storage. + call fastcc void @maybeSideEffect() + call fastcc i1 @runtime.stringEqual(ptr %10, i64 %11, ptr %10, i64 %11) + + ; Reading the string after comparing should also defeat the optimization. + %13 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) + %14 = extractvalue { ptr, i64, i64 } %13, 0 + %15 = extractvalue { ptr, i64, i64 } %13, 1 + call fastcc i1 @runtime.stringEqual(ptr %14, i64 %15, ptr %14, i64 %15) + call fastcc void @readString(ptr %14, i64 %15) + ret void +} + diff --git a/transform/testdata/stringfrombytes.out.ll b/transform/testdata/stringfrombytes.out.ll new file mode 100644 index 0000000000..3cdca7fa03 --- /dev/null +++ b/transform/testdata/stringfrombytes.out.ll @@ -0,0 +1,32 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64--linux" + +@str = constant [6 x i8] c"foobar" + +declare { ptr, i64, i64 } @runtime.stringToBytes(ptr, i64) + +declare { ptr, i64 } @runtime.stringFromBytes(ptr, i64, i64) + +declare i1 @runtime.stringEqual(ptr nocapture, i64, ptr nocapture, i64) + +declare void @maybeSideEffect() + +declare void @readString(ptr nocapture, i64) + +define void @testReadOnly() { +entry: + %0 = call fastcc { ptr, i64, i64 } @runtime.stringToBytes(ptr @str, i64 6) + %1 = extractvalue { ptr, i64, i64 } %0, 0 + %2 = extractvalue { ptr, i64, i64 } %0, 1 + %3 = extractvalue { ptr, i64, i64 } %0, 2 + %4 = call fastcc i1 @runtime.stringEqual(ptr %1, i64 %2, ptr %1, i64 %2) + %5 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) + %6 = extractvalue { ptr, i64, i64 } %5, 0 + call fastcc void @maybeSideEffect() + %7 = call fastcc i1 @runtime.stringEqual(ptr %6, i64 %2, ptr %6, i64 %2) + %8 = call fastcc { ptr, i64, i64 } @runtime.stringFromBytes(ptr %1, i64 %2, i64 %3) + %9 = extractvalue { ptr, i64, i64 } %8, 0 + %10 = call fastcc i1 @runtime.stringEqual(ptr %9, i64 %2, ptr %9, i64 %2) + call fastcc void @readString(ptr %9, i64 %2) + ret void +}