Skip to content

Commit 4abf26e

Browse files
authored
Add relocatable root compression (#43881)
Currently we can't cache "external" CodeInstances, i.e., those generated by compiling other modules' methods with externally-defined types. For example, consider `push!([], MyPkg.MyType())`: Base owns the method `push!(::Vector{Any}, ::Any)` but doesn't know about `MyType`. While there are several obstacles to caching exteral CodeInstances, the primary one is that in compressed IR, method roots are referenced from a list by index, and the index is defined by order of insertion. This order might change depending on package-loading sequence or other history-dependent factors. If the order isn't consistent, our current serialization techniques would result in corrupted code upon decompression, and that would generally trigger catastrophic failure. To avoid this problem, we simply avoid caching such CodeInstances. This enables roots to be referenced with respect to a `(key, index)` pair, where `key` identifies the module and `index` numbers just those roots with the same `key`. Roots with `key = 0` are considered to be of unknown origin, and CodeInstances referencing such roots will remain unserializable unless all such roots were added at the time of system image creation. To track this additional data, this adds two fields to core types: - to methods, it adds a `nroots_sysimg` field to count the number of roots defined at the time of writing the system image (such occur first in the list of `roots`) - to CodeInstances, it adds a flag `relocatability` having value 1 if every root is "safe," meaning it was either added at sysimg creation or is tagged with a non-zero `key`. Even a single unsafe root will cause this to have value 0.
1 parent 11bac43 commit 4abf26e

File tree

19 files changed

+341
-33
lines changed

19 files changed

+341
-33
lines changed

base/boot.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -418,9 +418,9 @@ eval(Core, :(LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line
418418
$(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))))
419419
eval(Core, :(CodeInstance(mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
420420
@nospecialize(inferred), const_flags::Int32,
421-
min_world::UInt, max_world::UInt) =
422-
ccall(:jl_new_codeinst, Ref{CodeInstance}, (Any, Any, Any, Any, Int32, UInt, UInt),
423-
mi, rettype, inferred_const, inferred, const_flags, min_world, max_world)))
421+
min_world::UInt, max_world::UInt, relocatability::UInt8) =
422+
ccall(:jl_new_codeinst, Ref{CodeInstance}, (Any, Any, Any, Any, Int32, UInt, UInt, UInt8),
423+
mi, rettype, inferred_const, inferred, const_flags, min_world, max_world, relocatability)))
424424
eval(Core, :(Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))))
425425
eval(Core, :(PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))))
426426
eval(Core, :(PartialOpaque(@nospecialize(typ), @nospecialize(env), isva::Bool, parent::MethodInstance, source::Method) = $(Expr(:new, :PartialOpaque, :typ, :env, :isva, :parent, :source))))

base/compiler/typeinfer.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
278278
end
279279

280280
function CodeInstance(result::InferenceResult, @nospecialize(inferred_result),
281-
valid_worlds::WorldRange)
281+
valid_worlds::WorldRange, relocatability::UInt8)
282282
local const_flags::Int32
283283
result_type = result.result
284284
@assert !(result_type isa LimitedAccuracy)
@@ -310,7 +310,7 @@ function CodeInstance(result::InferenceResult, @nospecialize(inferred_result),
310310
end
311311
return CodeInstance(result.linfo,
312312
widenconst(result_type), rettype_const, inferred_result,
313-
const_flags, first(valid_worlds), last(valid_worlds))
313+
const_flags, first(valid_worlds), last(valid_worlds), relocatability)
314314
end
315315

316316
# For the NativeInterpreter, we don't need to do an actual cache query to know
@@ -384,7 +384,8 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
384384
# TODO: also don't store inferred code if we've previously decided to interpret this function
385385
if !already_inferred
386386
inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result.src)
387-
code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
387+
relocatability = isa(inferred_result, Vector{UInt8}) ? inferred_result[end] : UInt8(0)
388+
code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds, relocatability)
388389
end
389390
unlock_mi_inference(interp, linfo)
390391
nothing

src/codegen.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7795,8 +7795,11 @@ jl_compile_result_t jl_emit_codeinst(
77957795
jl_options.debug_level > 1) {
77967796
// update the stored code
77977797
if (codeinst->inferred != (jl_value_t*)src) {
7798-
if (jl_is_method(def))
7798+
if (jl_is_method(def)) {
77997799
src = (jl_code_info_t*)jl_compress_ir(def, src);
7800+
assert(jl_typeis(src, jl_array_uint8_type));
7801+
codeinst->relocatability = ((uint8_t*)jl_array_data(src))[jl_array_len(src)-1];
7802+
}
78007803
codeinst->inferred = (jl_value_t*)src;
78017804
jl_gc_wb(codeinst, src);
78027805
}

src/common_symbols1.inc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,3 @@ jl_symbol("undef"),
9797
jl_symbol("sizeof"),
9898
jl_symbol("String"),
9999
jl_symbol("namedtuple.jl"),
100-
jl_symbol("pop"),

src/common_symbols2.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
jl_symbol("pop"),
12
jl_symbol("inbounds"),
23
jl_symbol("strings/string.jl"),
34
jl_symbol("Ref"),
@@ -251,4 +252,3 @@ jl_symbol("GitError"),
251252
jl_symbol("zeros"),
252253
jl_symbol("InexactError"),
253254
jl_symbol("LogLevel"),
254-
jl_symbol("between"),

src/dump.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -528,6 +528,7 @@ static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_
528528
jl_serialize_value(s, NULL);
529529
jl_serialize_value(s, jl_any_type);
530530
}
531+
write_uint8(s->s, codeinst->relocatability);
531532
jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque);
532533
}
533534

@@ -705,6 +706,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
705706
jl_serialize_value(s, (jl_value_t*)m->slot_syms);
706707
jl_serialize_value(s, (jl_value_t*)m->roots);
707708
jl_serialize_value(s, (jl_value_t*)m->root_blocks);
709+
write_int32(s->s, m->nroots_sysimg);
708710
jl_serialize_value(s, (jl_value_t*)m->ccallable);
709711
jl_serialize_value(s, (jl_value_t*)m->source);
710712
jl_serialize_value(s, (jl_value_t*)m->unspecialized);
@@ -1577,6 +1579,7 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
15771579
m->root_blocks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->root_blocks);
15781580
if (m->root_blocks)
15791581
jl_gc_wb(m, m->root_blocks);
1582+
m->nroots_sysimg = read_int32(s->s);
15801583
m->ccallable = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->ccallable);
15811584
if (m->ccallable) {
15821585
jl_gc_wb(m, m->ccallable);
@@ -1661,6 +1664,7 @@ static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl
16611664
codeinst->invoke = jl_fptr_const_return;
16621665
if ((flags >> 3) & 1)
16631666
codeinst->precompile = 1;
1667+
codeinst->relocatability = read_uint8(s->s);
16641668
codeinst->next = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->next);
16651669
jl_gc_wb(codeinst, codeinst->next);
16661670
if (validate) {

src/gf.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ
206206
JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
207207
jl_method_instance_t *mi, jl_value_t *rettype,
208208
jl_value_t *inferred_const, jl_value_t *inferred,
209-
int32_t const_flags, size_t min_world, size_t max_world);
209+
int32_t const_flags, size_t min_world, size_t max_world, uint8_t relocatability);
210210
JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
211211
jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
212212

@@ -243,7 +243,7 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
243243

244244
jl_code_instance_t *codeinst = jl_new_codeinst(mi,
245245
(jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
246-
0, 1, ~(size_t)0);
246+
0, 1, ~(size_t)0, 0);
247247
jl_mi_cache_insert(mi, codeinst);
248248
codeinst->specptr.fptr1 = fptr;
249249
codeinst->invoke = jl_fptr_args;
@@ -366,15 +366,15 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
366366
}
367367
codeinst = jl_new_codeinst(
368368
mi, rettype, NULL, NULL,
369-
0, min_world, max_world);
369+
0, min_world, max_world, 0);
370370
jl_mi_cache_insert(mi, codeinst);
371371
return codeinst;
372372
}
373373

374374
JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
375375
jl_method_instance_t *mi, jl_value_t *rettype,
376376
jl_value_t *inferred_const, jl_value_t *inferred,
377-
int32_t const_flags, size_t min_world, size_t max_world
377+
int32_t const_flags, size_t min_world, size_t max_world, uint8_t relocatability
378378
/*, jl_array_t *edges, int absolute_max*/)
379379
{
380380
jl_task_t *ct = jl_current_task;
@@ -399,6 +399,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
399399
codeinst->isspecsig = 0;
400400
codeinst->precompile = 0;
401401
codeinst->next = NULL;
402+
codeinst->relocatability = relocatability;
402403
return codeinst;
403404
}
404405

@@ -2008,7 +2009,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
20082009
if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
20092010
jl_code_instance_t *codeinst = jl_new_codeinst(mi,
20102011
(jl_value_t*)jl_any_type, NULL, NULL,
2011-
0, 1, ~(size_t)0);
2012+
0, 1, ~(size_t)0, 0);
20122013
codeinst->isspecsig = 0;
20132014
codeinst->specptr = unspec->specptr;
20142015
codeinst->rettype_const = unspec->rettype_const;
@@ -2026,7 +2027,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
20262027
if (!jl_code_requires_compiler(src)) {
20272028
jl_code_instance_t *codeinst = jl_new_codeinst(mi,
20282029
(jl_value_t*)jl_any_type, NULL, NULL,
2029-
0, 1, ~(size_t)0);
2030+
0, 1, ~(size_t)0, 0);
20302031
codeinst->invoke = jl_fptr_interpret_call;
20312032
jl_mi_cache_insert(mi, codeinst);
20322033
record_precompile_statement(mi);
@@ -2061,7 +2062,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
20612062
return ucache;
20622063
}
20632064
codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
2064-
0, 1, ~(size_t)0);
2065+
0, 1, ~(size_t)0, 0);
20652066
codeinst->isspecsig = 0;
20662067
codeinst->specptr = ucache->specptr;
20672068
codeinst->rettype_const = ucache->rettype_const;

src/ircode.c

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,30 +26,37 @@ typedef struct {
2626
// method we're compressing for
2727
jl_method_t *method;
2828
jl_ptls_t ptls;
29+
uint8_t relocatability;
2930
} jl_ircode_state;
3031

3132
// --- encoding ---
3233

3334
#define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0)
3435

35-
static int literal_val_id(jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED
36+
static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
37+
{
38+
if (!get_root_reference(rr, s->method, i))
39+
s->relocatability = 0;
40+
}
41+
42+
static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED
3643
{
3744
jl_array_t *rs = s->method->roots;
3845
int i, l = jl_array_len(rs);
3946
if (jl_is_symbol(v) || jl_is_concrete_type(v)) {
4047
for (i = 0; i < l; i++) {
4148
if (jl_array_ptr_ref(rs, i) == v)
42-
return i;
49+
return tagged_root(rr, s, i);
4350
}
4451
}
4552
else {
4653
for (i = 0; i < l; i++) {
4754
if (jl_egal(jl_array_ptr_ref(rs, i), v))
48-
return i;
55+
return tagged_root(rr, s, i);
4956
}
5057
}
5158
jl_add_method_root(s->method, jl_precompile_toplevel_module, v);
52-
return jl_array_len(rs) - 1;
59+
return tagged_root(rr, s, jl_array_len(rs) - 1);
5360
}
5461

5562
static void jl_encode_int32(jl_ircode_state *s, int32_t x)
@@ -67,6 +74,7 @@ static void jl_encode_int32(jl_ircode_state *s, int32_t x)
6774
static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
6875
{
6976
size_t i;
77+
rle_reference rr;
7078

7179
if (v == NULL) {
7280
write_uint8(s->s, TAG_NULL);
@@ -321,8 +329,13 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
321329
if (!as_literal && !(jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_tuple(v) ||
322330
jl_is_linenode(v) || jl_is_upsilonnode(v) || jl_is_pinode(v) ||
323331
jl_is_slot(v) || jl_is_ssavalue(v))) {
324-
int id = literal_val_id(s, v);
332+
literal_val_id(&rr, s, v);
333+
int id = rr.index;
325334
assert(id >= 0);
335+
if (rr.key) {
336+
write_uint8(s->s, TAG_RELOC_METHODROOT);
337+
write_int64(s->s, rr.key);
338+
}
326339
if (id < 256) {
327340
write_uint8(s->s, TAG_METHODROOT);
328341
write_uint8(s->s, id);
@@ -577,6 +590,7 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
577590
assert(!ios_eof(s->s));
578591
jl_value_t *v;
579592
size_t i, n;
593+
uint64_t key;
580594
uint8_t tag = read_uint8(s->s);
581595
if (tag > LAST_TAG)
582596
return jl_deser_tag(tag);
@@ -585,10 +599,15 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
585599
case 0:
586600
tag = read_uint8(s->s);
587601
return jl_deser_tag(tag);
602+
case TAG_RELOC_METHODROOT:
603+
key = read_uint64(s->s);
604+
tag = read_uint8(s->s);
605+
assert(tag == TAG_METHODROOT || tag == TAG_LONG_METHODROOT);
606+
return lookup_root(s->method, key, tag == TAG_METHODROOT ? read_uint8(s->s) : read_uint16(s->s));
588607
case TAG_METHODROOT:
589-
return jl_array_ptr_ref(s->method->roots, read_uint8(s->s));
608+
return lookup_root(s->method, 0, read_uint8(s->s));
590609
case TAG_LONG_METHODROOT:
591-
return jl_array_ptr_ref(s->method->roots, read_uint16(s->s));
610+
return lookup_root(s->method, 0, read_uint16(s->s));
592611
case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC:
593612
return jl_decode_value_svec(s, tag);
594613
case TAG_COMMONSYM:
@@ -706,7 +725,8 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
706725
jl_ircode_state s = {
707726
&dest,
708727
m,
709-
jl_current_task->ptls
728+
jl_current_task->ptls,
729+
1
710730
};
711731

712732
jl_code_info_flags_t flags = code_info_flags(code->pure, code->propagate_inbounds, code->inlineable, code->inferred, code->constprop);
@@ -756,6 +776,8 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
756776
ios_write(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
757777
}
758778

779+
write_uint8(s.s, s.relocatability);
780+
759781
ios_flush(s.s);
760782
jl_array_t *v = jl_take_buffer(&dest);
761783
ios_close(s.s);
@@ -786,7 +808,8 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
786808
jl_ircode_state s = {
787809
&src,
788810
m,
789-
jl_current_task->ptls
811+
jl_current_task->ptls,
812+
1
790813
};
791814

792815
jl_code_info_t *code = jl_new_code_info_uninit();
@@ -831,6 +854,8 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
831854
ios_readall(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
832855
}
833856

857+
(void) read_uint8(s.s); // relocatability
858+
834859
assert(ios_getc(s.s) == -1);
835860
ios_close(s.s);
836861
JL_GC_PUSH1(&code);

src/jltypes.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2393,7 +2393,7 @@ void jl_init_types(void) JL_GC_DISABLED
23932393
jl_method_type =
23942394
jl_new_datatype(jl_symbol("Method"), core,
23952395
jl_any_type, jl_emptysvec,
2396-
jl_perm_symsvec(27,
2396+
jl_perm_symsvec(28,
23972397
"name",
23982398
"module",
23992399
"file",
@@ -2410,6 +2410,7 @@ void jl_init_types(void) JL_GC_DISABLED
24102410
"generator", // !const
24112411
"roots", // !const
24122412
"root_blocks", // !const
2413+
"nroots_sysimg",
24132414
"ccallable", // !const
24142415
"invokes", // !const
24152416
"recursion_relation", // !const
@@ -2421,7 +2422,7 @@ void jl_init_types(void) JL_GC_DISABLED
24212422
"pure",
24222423
"is_for_opaque_closure",
24232424
"constprop"),
2424-
jl_svec(27,
2425+
jl_svec(28,
24252426
jl_symbol_type,
24262427
jl_module_type,
24272428
jl_symbol_type,
@@ -2438,6 +2439,7 @@ void jl_init_types(void) JL_GC_DISABLED
24382439
jl_any_type,
24392440
jl_array_any_type,
24402441
jl_array_uint64_type,
2442+
jl_int32_type,
24412443
jl_simplevector_type,
24422444
jl_any_type,
24432445
jl_any_type,
@@ -2483,7 +2485,7 @@ void jl_init_types(void) JL_GC_DISABLED
24832485
jl_code_instance_type =
24842486
jl_new_datatype(jl_symbol("CodeInstance"), core,
24852487
jl_any_type, jl_emptysvec,
2486-
jl_perm_symsvec(11,
2488+
jl_perm_symsvec(12,
24872489
"def",
24882490
"next",
24892491
"min_world",
@@ -2493,8 +2495,9 @@ void jl_init_types(void) JL_GC_DISABLED
24932495
"inferred",
24942496
//"edges",
24952497
//"absolute_max",
2496-
"isspecsig", "precompile", "invoke", "specptr"), // function object decls
2497-
jl_svec(11,
2498+
"isspecsig", "precompile", "invoke", "specptr", // function object decls
2499+
"relocatability"),
2500+
jl_svec(12,
24982501
jl_method_instance_type,
24992502
jl_any_type,
25002503
jl_ulong_type,
@@ -2506,7 +2509,8 @@ void jl_init_types(void) JL_GC_DISABLED
25062509
//jl_bool_type,
25072510
jl_bool_type,
25082511
jl_bool_type,
2509-
jl_any_type, jl_any_type), // fptrs
2512+
jl_any_type, jl_any_type, // fptrs
2513+
jl_uint8_type),
25102514
jl_emptysvec,
25112515
0, 1, 1);
25122516
jl_svecset(jl_code_instance_type->types, 1, jl_code_instance_type);

src/julia.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ typedef struct _jl_method_t {
294294
// Identify roots by module-of-origin. We only track the module for roots added during incremental compilation.
295295
// May be NULL if no external roots have been added, otherwise it's a Vector{UInt64}
296296
jl_array_t *root_blocks; // RLE (build_id, offset) pairs (even/odd indexing)
297+
int32_t nroots_sysimg; // # of roots stored in the system image
297298
jl_svec_t *ccallable; // svec(rettype, sig) if a ccallable entry point is requested for this
298299

299300
// cache of specializations of this method for invoke(), i.e.
@@ -381,6 +382,7 @@ typedef struct _jl_code_instance_t {
381382
_Atomic(jl_fptr_sparam_t) fptr3;
382383
// 4 interpreter
383384
} specptr; // private data for `jlcall entry point
385+
uint8_t relocatability; // nonzero if all roots are built into sysimg or tagged by module key
384386
} jl_code_instance_t;
385387

386388
// all values are callable as Functions

0 commit comments

Comments
 (0)