Skip to content

Commit 5a71fb1

Browse files
authored
adds c layouts (#1524)
also, fixes c.data computation to include the trailing padding.
1 parent 4d019ce commit 5a71fb1

File tree

6 files changed

+256
-71
lines changed

6 files changed

+256
-71
lines changed

lib/bap_c/bap_c_abi.ml

Lines changed: 88 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ open Monads.Std
66
include Self()
77

88
module Attrs = Bap_c_term_attributes
9-
9+
module Data = Bap_c_data
1010
type ctype = t
1111

1212
let is_const p = p.Spec.qualifier.Qualifier.const
@@ -40,7 +40,7 @@ type error = [
4040
] [@@deriving sexp_of]
4141

4242
let sexp_of_exp exp = Sexp.Atom (Exp.to_string exp)
43-
type param = Bap_c_data.t * exp [@@deriving sexp]
43+
type param = Data.t * exp [@@deriving sexp]
4444

4545
type args = {
4646
return : param option;
@@ -58,33 +58,96 @@ exception Failed of error [@@deriving sexp_of]
5858
let fail x = raise (Failed x)
5959

6060
let data (size : #Bap_c_size.base) (t : Bap_c_type.t) =
61-
let open Bap_c_data in
61+
let open Data in
62+
let sizeof t = match size#bits t with
63+
| None -> Size.in_bits size#pointer
64+
| Some s -> s in
65+
let padding pad : Data.t =
66+
match Size.of_int_opt pad with
67+
| Some pad -> Imm (pad,Set [])
68+
| None ->
69+
let data : Data.t = Imm (`r8,Set []) in
70+
Seq (List.init (pad/8) ~f:(Fn.const data)) in
6271
let rec data = function
6372
| `Void -> Seq []
6473
| `Basic {Spec.t} -> Imm (size#basic t, Top)
6574
| `Pointer {Spec.t} -> Ptr (data t)
6675
| `Array {Spec.t={Array.element=t; size=None}} -> Ptr (data t)
6776
| `Array {Spec.t={Array.element=t; size=Some n}} ->
68-
let et = data t in
69-
Ptr (Seq (List.init n ~f:(fun _ -> et)))
77+
Ptr (Seq (List.init n ~f:(Fn.const (data t))))
7078
| `Structure {Spec.t={Compound.fields=fs}} ->
71-
let _,ss =
72-
List.fold fs ~init:(0,[]) ~f:(fun (off,seq) (_,t) ->
73-
let off' = match size#bits t with
74-
| None -> off + Size.in_bits size#pointer (* or assert false *)
75-
| Some sz -> off + sz in
76-
match size#padding t off with
77-
| None -> off', data t :: seq
78-
| Some pad -> off, data t :: Imm (pad,Set []) :: seq) in
79+
List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (_,t) ->
80+
let fsize = sizeof t in
81+
let pad = Bap_c_size.padding (size#alignment t) off in
82+
off + fsize + pad, total + fsize + pad, match pad with
83+
| 0 -> data t :: seq
84+
| _ -> data t :: padding pad :: seq) |> fun (_,total,ss) ->
85+
let fullsize = sizeof t in
86+
let pad = max 0 (fullsize - total) in
87+
let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in
7988
Seq (List.rev ss)
80-
| `Union {Spec.t=_} ->
81-
let sz = match size#bits t with
82-
| None -> Size.in_bits size#pointer
83-
| Some sz -> sz in
84-
Seq (List.init (sz/8) ~f:(fun _ -> Imm (`r8,Set [])))
89+
| `Union _ ->
90+
let sz = sizeof t in
91+
Seq (List.init (sz/8) ~f:(fun _ -> Imm (`r8,Top)))
8592
| `Function _ -> Ptr (Imm ((size#pointer :> size),Top)) in
8693
data t
8794

95+
let layout (size : #Bap_c_size.base) (t : Bap_c_type.t) =
96+
let open Data in
97+
let sizeof t = match size#bits t with
98+
| None -> Size.in_bits size#pointer
99+
| Some s -> s in
100+
let imm size obj : Data.layout = {layout=Imm(size,obj)}
101+
and ptr {layout=data} : Data.layout = {layout=Ptr data}
102+
and seq layouts : Data.layout = {
103+
layout = Seq (List.map layouts ~f:(fun {layout} -> layout))
104+
} in
105+
let padding pad : Data.layout = imm pad Undef in
106+
let rec layout t : Data.layout = match t with
107+
| `Void -> imm 8 Undef
108+
| `Basic {Spec.t} -> imm (Size.in_bits (size#basic t)) (Basic t)
109+
| `Pointer {Spec.t} -> ptr (layout t)
110+
| `Array {Spec.t={Array.element=t; size=None}} -> ptr (layout t)
111+
| `Array {Spec.t={Array.element=t; size=Some n}} ->
112+
ptr (seq (List.init n ~f:(Fn.const (layout t))))
113+
| `Structure {Spec.t={Compound.fields=fs}} ->
114+
List.fold fs ~init:(0,0,[]) ~f:(fun (off,total,seq) (name,t) ->
115+
let fsize = sizeof t in
116+
let pad = Bap_c_size.padding (size#alignment t) off in
117+
off + fsize + pad, total + fsize + pad,
118+
imm fsize (Field (name,layout t)) ::
119+
match pad with
120+
| 0 -> seq
121+
| _ -> padding pad :: seq) |> fun (_,total,ss) ->
122+
let fullsize = sizeof t in
123+
let pad = max 0 (fullsize - total) in
124+
let ss = if pad = 0 then ss else padding (fullsize-total) :: ss in
125+
seq (List.rev ss)
126+
| `Union {Spec.t={Compound.fields=fs}} ->
127+
let total = sizeof t in
128+
let variants = List.map fs ~f:(fun (name,t) ->
129+
let fsize = sizeof t in
130+
let pad = max 0 (total - fsize) in
131+
let field = imm fsize @@ Field (name, layout t) in
132+
match pad with
133+
| 0 -> field
134+
| _ -> seq [field; padding pad]) in
135+
imm total (Union variants)
136+
| `Function _ -> ptr (imm (Size.in_bits (size#pointer)) Undef) in
137+
layout t
138+
139+
let rec size_of_data size : Data.t -> int = function
140+
| Imm (size,_) -> Size.in_bits size
141+
| Seq xs -> List.sum (module Int) ~f:(size_of_data size) xs
142+
| Ptr _ -> Size.in_bits (size#pointer)
143+
144+
let rec size_of_layout size : Data.layout -> int =
145+
fun {layout} -> size_of_datum size layout
146+
and size_of_datum size : _ Data.datum -> int = function
147+
| Imm (size,_) -> size
148+
| Seq xs -> List.sum (module Int) ~f:(size_of_datum size) xs
149+
| Ptr _ -> Size.in_bits (size#pointer)
150+
88151
let array_to_pointer (t : ctype) : ctype =
89152
match t with
90153
| `Array ({t={element}} as s) -> `Pointer {s with t = element}
@@ -109,12 +172,18 @@ let create_arg size i intent name t (data,exp) sub =
109172
let ltyp = match size#bits t with
110173
| None -> Type.imm (Size.in_bits size#pointer)
111174
| Some m -> Type.imm m in
175+
let layout = match data with
176+
| Data.Ptr _ ->
177+
if Bap_c_type.is_pointer t then layout size t
178+
else layout size (Bap_c_type.pointer t)
179+
| _ -> layout size t in
112180
let rtyp = Type.infer_exn exp in
113181
let name = if String.is_empty name then sprintf "arg%d" (i+1) else name in
114182
let var = Var.create (Sub.name sub ^ "_" ^ name) ltyp in
115183
let arg = Arg.create ~intent var @@ coerce ltyp rtyp exp in
116184
let arg = Term.set_attr arg Attrs.data data in
117185
let arg = Term.set_attr arg Attrs.t t in
186+
let arg = Term.set_attr arg Attrs.layout layout in
118187
arg
119188

120189
let registry = Hashtbl.create (module String)
@@ -242,7 +311,7 @@ module Arg = struct
242311
module C = struct
243312
module Size = Bap_c_size
244313
module Type = Bap_c_type
245-
module Data = Bap_c_data
314+
module Data = Data
246315
end
247316

248317
module Stack : sig

lib/bap_c/bap_c_abi.mli

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,19 @@ val create_api_processor : #Bap_c_size.base -> t -> Bap_api.t
7171

7272
(** [data size t] creates an abstraction of data that is represented
7373
by type [t]. The [size] parameter defines a data model, e.g.,
74-
sizes of primitive types, padding and alignment restrictions, etc.*)
74+
sizes of primitive types, padding and alignment restrictions, etc.
75+
76+
The abstraction includes inner and trailing paddings, when
77+
necessary. *)
7578
val data : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.t
7679

80+
81+
(** [layout size t] computes the c data type layout.
82+
83+
@since 2.5.0 *)
84+
val layout : #Bap_c_size.base -> Bap_c_type.t -> Bap_c_data.layout
85+
86+
7787
(** [arg_intent t] infers argument intention based on its C type. If
7888
an argument is passed by value, i.e., it is a c basic type, then
7989
it is an input argument. If an argument is a reference, but not a

lib/bap_c/bap_c_data.ml

Lines changed: 48 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,73 @@
1-
(** C Data model.
2-
3-
This module defines abstractions for C values.
4-
5-
A value is backed by a datum - a sequence of bits that represents
6-
the value. This module also defines models for integer
7-
representation.
8-
*)
91
open Core_kernel[@@warning "-D"]
102
open Bap.Std
3+
open Format
114

12-
(** models for 32 bit systems *)
135
type model32 = [
146
| `LP32
157
| `ILP32
168
]
179

18-
(** models for 64 bit systems *)
1910
type model64 = [
2011
| `ILP64
2112
| `LLP64
2213
| `LP64
2314
]
2415

25-
26-
(** The following table summarize all models of integer
27-
representation.
28-
29-
{v
30-
LP32 ILP32 ILP64 LLP64 LP64
31-
char 8 8 8 8 8
32-
short 16 16 16 16 16
33-
int 16 32 64 32 32
34-
long 32 32 64 32 64
35-
addr 32 32 64 64 64
36-
v}
37-
*)
3816
type model = [model32 | model64]
39-
40-
(** Abstract value lattice. The lattice is complete, and
41-
[Set []] is the supremum, i.e., the bot.*)
4217
type value =
4318
| Top
44-
(** any possible value *)
4519
| Set of word list
46-
(** one of the specified *)
4720
[@@deriving bin_io, compare, sexp]
4821

22+
type 'd obj =
23+
| Basic of Bap_c_type.basic
24+
| Field of (string * 'd)
25+
| Undef
26+
| Union of 'd list
27+
[@@deriving bin_io, compare, sexp]
4928

50-
(** abstraction of a С datum.
29+
type ('d,'s) datum =
30+
| Imm of 's * 'd
31+
| Seq of ('d,'s) datum list
32+
| Ptr of ('d,'s) datum
33+
[@@deriving bin_io, compare, sexp]
5134

52-
The datum is a sequence of bits that represenst a particular C
53-
value. We abstract datum as either an immediate value of the given
54-
size and value lattice, or a sequence of data, or a pointer to a
55-
datum.*)
56-
type t =
57-
| Imm of Size.t * value
58-
(** [Imm (size,value)] *)
59-
| Seq of t list
60-
(** [Seq (t1,..,tN)] *)
61-
| Ptr of t
62-
(** [Ptr (type,size)] *)
35+
type layout = {layout : (layout obj,int) datum}
6336
[@@deriving bin_io, compare, sexp]
6437

38+
type t = (value,Size.t) datum
39+
[@@deriving bin_io, compare, sexp]
40+
41+
let pp_value ppf = function
42+
| Top -> fprintf ppf "Top"
43+
| Set xs -> fprintf ppf "%a" (Seq.pp Word.pp) (Seq.of_list xs)
44+
let rec pp ppf = function
45+
| Imm (sz,v) -> fprintf ppf "%a:%a" pp_value v Size.pp sz
46+
| Seq ts -> fprintf ppf "%a" (Seq.pp pp) (Seq.of_list ts)
47+
| Ptr t -> fprintf ppf "%a ptr" pp t
48+
6549

66-
(** *)
50+
let rec pp_layout ppf : layout -> unit = fun {layout=datum} ->
51+
pp_datum ppf datum
52+
and pp_datum ppf : (layout obj, int) datum -> unit = function
53+
| Imm (sz,v) ->
54+
fprintf ppf "@[<2>[%a : %d]@]" pp_obj v sz
55+
| Seq objs ->
56+
fprintf ppf "@[@[<hv2>{@ ";
57+
pp_print_list ~pp_sep:(fun ppf () ->
58+
fprintf ppf ",@ ")
59+
pp_datum ppf objs;
60+
fprintf ppf "@]@;}@]"
61+
| Ptr t ->
62+
fprintf ppf "*%a" pp_datum t
63+
and pp_obj ppf : layout obj -> unit = function
64+
| Basic t -> Bap_c_type.(pp ppf (basic t))
65+
| Field (name,layout) ->
66+
fprintf ppf "@[<2><%s : %a>@]" name pp_layout layout
67+
| Undef ->
68+
fprintf ppf "<undef>"
69+
| Union xs ->
70+
fprintf ppf "@[<hv>";
71+
pp_print_list ~pp_sep:(fun ppf () -> fprintf ppf "@;| ")
72+
pp_layout ppf xs;
73+
fprintf ppf "@]"

lib/bap_c/bap_c_data.mli

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
(** C Data model.
2+
3+
This module defines abstractions for C values.
4+
5+
A value is backed by a datum - a sequence of bits that represents
6+
the value. This module also defines models for integer
7+
representation.
8+
*)
9+
open Core_kernel[@@warning "-D"]
10+
open Bap.Std
11+
12+
(** models for 32 bit systems *)
13+
type model32 = [
14+
| `LP32
15+
| `ILP32
16+
]
17+
18+
(** models for 64 bit systems *)
19+
type model64 = [
20+
| `ILP64
21+
| `LLP64
22+
| `LP64
23+
]
24+
25+
26+
(** The following table summarize all models of integer
27+
representation.
28+
29+
{v
30+
LP32 ILP32 ILP64 LLP64 LP64
31+
char 8 8 8 8 8
32+
short 16 16 16 16 16
33+
int 16 32 64 32 32
34+
long 32 32 64 32 64
35+
addr 32 32 64 64 64
36+
v}
37+
*)
38+
type model = [model32 | model64]
39+
40+
(** A value lattice.*)
41+
type value =
42+
| Top (** any possible value *)
43+
| Set of word list (** one of the specified, [Set []] is bot *)
44+
[@@deriving bin_io, compare, sexp]
45+
46+
47+
(** A C Object representation.
48+
49+
The type is parameterized with the object layout representation to
50+
enable the recursive definition of the generalized layout type.
51+
52+
@since 2.5.0 *)
53+
type 'd obj =
54+
| Basic of Bap_c_type.basic (** A value of a basic type *)
55+
| Field of (string * 'd) (** A struct or union field *)
56+
| Undef (** Undefined data (padding or code) *)
57+
| Union of 'd list (** Union of values *)
58+
[@@deriving bin_io, compare, sexp]
59+
60+
(** abstraction of a С datum.
61+
62+
The datum is a sequence of bits that represents a C object. We
63+
abstract datum as either an immediate value of the given size,
64+
or a sequence of data, or a pointer to a datum.
65+
66+
@since 2.5.0
67+
*)
68+
type ('d,'s) datum =
69+
| Imm of 's * 'd (** [Imm (size, value)] *)
70+
| Seq of ('d,'s) datum list (** [Seq [t1; ... ;tN]] *)
71+
| Ptr of ('d,'s) datum (** [Ptr datum] *)
72+
[@@deriving bin_io, compare, sexp]
73+
74+
75+
(** Describes C object's layout. *)
76+
type layout = {layout : (layout obj,int) datum}
77+
[@@deriving bin_io, compare, sexp]
78+
79+
80+
(** The datum that uses value lattice for object representation. *)
81+
type t = (value,Size.t) datum
82+
[@@deriving bin_io, compare, sexp]
83+
84+
85+
(** [pp ppf datum] prints the datum in a human-readable form.
86+
@since 2.5.0 *)
87+
val pp : Format.formatter -> t -> unit
88+
89+
90+
(** [pp_layout ppf layout] outputs layout in a human-readable form.
91+
@since 2.5.0 *)
92+
val pp_layout : Format.formatter -> layout -> unit

0 commit comments

Comments
 (0)