1 | // Copyright 2021 The Go Authors. All rights reserved. |
---|---|
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | package pkgbits |
6 | |
7 | import ( |
8 | "bytes" |
9 | "crypto/md5" |
10 | "encoding/binary" |
11 | "go/constant" |
12 | "io" |
13 | "math/big" |
14 | "runtime" |
15 | ) |
16 | |
17 | // currentVersion is the current version number. |
18 | // |
19 | // - v0: initial prototype |
20 | // |
21 | // - v1: adds the flags uint32 word |
22 | const currentVersion uint32 = 1 |
23 | |
24 | // A PkgEncoder provides methods for encoding a package's Unified IR |
25 | // export data. |
26 | type PkgEncoder struct { |
27 | // elems holds the bitstream for previously encoded elements. |
28 | elems [numRelocs][]string |
29 | |
30 | // stringsIdx maps previously encoded strings to their index within |
31 | // the RelocString section, to allow deduplication. That is, |
32 | // elems[RelocString][stringsIdx[s]] == s (if present). |
33 | stringsIdx map[string]Index |
34 | |
35 | // syncFrames is the number of frames to write at each sync |
36 | // marker. A negative value means sync markers are omitted. |
37 | syncFrames int |
38 | } |
39 | |
40 | // SyncMarkers reports whether pw uses sync markers. |
41 | func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 } |
42 | |
43 | // NewPkgEncoder returns an initialized PkgEncoder. |
44 | // |
45 | // syncFrames is the number of caller frames that should be serialized |
46 | // at Sync points. Serializing additional frames results in larger |
47 | // export data files, but can help diagnosing desync errors in |
48 | // higher-level Unified IR reader/writer code. If syncFrames is |
49 | // negative, then sync markers are omitted entirely. |
50 | func NewPkgEncoder(syncFrames int) PkgEncoder { |
51 | return PkgEncoder{ |
52 | stringsIdx: make(map[string]Index), |
53 | syncFrames: syncFrames, |
54 | } |
55 | } |
56 | |
57 | // DumpTo writes the package's encoded data to out0 and returns the |
58 | // package fingerprint. |
59 | func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) { |
60 | h := md5.New() |
61 | out := io.MultiWriter(out0, h) |
62 | |
63 | writeUint32 := func(x uint32) { |
64 | assert(binary.Write(out, binary.LittleEndian, x) == nil) |
65 | } |
66 | |
67 | writeUint32(currentVersion) |
68 | |
69 | var flags uint32 |
70 | if pw.SyncMarkers() { |
71 | flags |= flagSyncMarkers |
72 | } |
73 | writeUint32(flags) |
74 | |
75 | // Write elemEndsEnds. |
76 | var sum uint32 |
77 | for _, elems := range &pw.elems { |
78 | sum += uint32(len(elems)) |
79 | writeUint32(sum) |
80 | } |
81 | |
82 | // Write elemEnds. |
83 | sum = 0 |
84 | for _, elems := range &pw.elems { |
85 | for _, elem := range elems { |
86 | sum += uint32(len(elem)) |
87 | writeUint32(sum) |
88 | } |
89 | } |
90 | |
91 | // Write elemData. |
92 | for _, elems := range &pw.elems { |
93 | for _, elem := range elems { |
94 | _, err := io.WriteString(out, elem) |
95 | assert(err == nil) |
96 | } |
97 | } |
98 | |
99 | // Write fingerprint. |
100 | copy(fingerprint[:], h.Sum(nil)) |
101 | _, err := out0.Write(fingerprint[:]) |
102 | assert(err == nil) |
103 | |
104 | return |
105 | } |
106 | |
107 | // StringIdx adds a string value to the strings section, if not |
108 | // already present, and returns its index. |
109 | func (pw *PkgEncoder) StringIdx(s string) Index { |
110 | if idx, ok := pw.stringsIdx[s]; ok { |
111 | assert(pw.elems[RelocString][idx] == s) |
112 | return idx |
113 | } |
114 | |
115 | idx := Index(len(pw.elems[RelocString])) |
116 | pw.elems[RelocString] = append(pw.elems[RelocString], s) |
117 | pw.stringsIdx[s] = idx |
118 | return idx |
119 | } |
120 | |
121 | // NewEncoder returns an Encoder for a new element within the given |
122 | // section, and encodes the given SyncMarker as the start of the |
123 | // element bitstream. |
124 | func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder { |
125 | e := pw.NewEncoderRaw(k) |
126 | e.Sync(marker) |
127 | return e |
128 | } |
129 | |
130 | // NewEncoderRaw returns an Encoder for a new element within the given |
131 | // section. |
132 | // |
133 | // Most callers should use NewEncoder instead. |
134 | func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder { |
135 | idx := Index(len(pw.elems[k])) |
136 | pw.elems[k] = append(pw.elems[k], "") // placeholder |
137 | |
138 | return Encoder{ |
139 | p: pw, |
140 | k: k, |
141 | Idx: idx, |
142 | } |
143 | } |
144 | |
145 | // An Encoder provides methods for encoding an individual element's |
146 | // bitstream data. |
147 | type Encoder struct { |
148 | p *PkgEncoder |
149 | |
150 | Relocs []RelocEnt |
151 | RelocMap map[RelocEnt]uint32 |
152 | Data bytes.Buffer // accumulated element bitstream data |
153 | |
154 | encodingRelocHeader bool |
155 | |
156 | k RelocKind |
157 | Idx Index // index within relocation section |
158 | } |
159 | |
160 | // Flush finalizes the element's bitstream and returns its Index. |
161 | func (w *Encoder) Flush() Index { |
162 | var sb bytes.Buffer // TODO(mdempsky): strings.Builder after #44505 is resolved |
163 | |
164 | // Backup the data so we write the relocations at the front. |
165 | var tmp bytes.Buffer |
166 | io.Copy(&tmp, &w.Data) |
167 | |
168 | // TODO(mdempsky): Consider writing these out separately so they're |
169 | // easier to strip, along with function bodies, so that we can prune |
170 | // down to just the data that's relevant to go/types. |
171 | if w.encodingRelocHeader { |
172 | panic("encodingRelocHeader already true; recursive flush?") |
173 | } |
174 | w.encodingRelocHeader = true |
175 | w.Sync(SyncRelocs) |
176 | w.Len(len(w.Relocs)) |
177 | for _, rEnt := range w.Relocs { |
178 | w.Sync(SyncReloc) |
179 | w.Len(int(rEnt.Kind)) |
180 | w.Len(int(rEnt.Idx)) |
181 | } |
182 | |
183 | io.Copy(&sb, &w.Data) |
184 | io.Copy(&sb, &tmp) |
185 | w.p.elems[w.k][w.Idx] = sb.String() |
186 | |
187 | return w.Idx |
188 | } |
189 | |
190 | func (w *Encoder) checkErr(err error) { |
191 | if err != nil { |
192 | errorf("unexpected encoding error: %v", err) |
193 | } |
194 | } |
195 | |
196 | func (w *Encoder) rawUvarint(x uint64) { |
197 | var buf [binary.MaxVarintLen64]byte |
198 | n := binary.PutUvarint(buf[:], x) |
199 | _, err := w.Data.Write(buf[:n]) |
200 | w.checkErr(err) |
201 | } |
202 | |
203 | func (w *Encoder) rawVarint(x int64) { |
204 | // Zig-zag encode. |
205 | ux := uint64(x) << 1 |
206 | if x < 0 { |
207 | ux = ^ux |
208 | } |
209 | |
210 | w.rawUvarint(ux) |
211 | } |
212 | |
213 | func (w *Encoder) rawReloc(r RelocKind, idx Index) int { |
214 | e := RelocEnt{r, idx} |
215 | if w.RelocMap != nil { |
216 | if i, ok := w.RelocMap[e]; ok { |
217 | return int(i) |
218 | } |
219 | } else { |
220 | w.RelocMap = make(map[RelocEnt]uint32) |
221 | } |
222 | |
223 | i := len(w.Relocs) |
224 | w.RelocMap[e] = uint32(i) |
225 | w.Relocs = append(w.Relocs, e) |
226 | return i |
227 | } |
228 | |
229 | func (w *Encoder) Sync(m SyncMarker) { |
230 | if !w.p.SyncMarkers() { |
231 | return |
232 | } |
233 | |
234 | // Writing out stack frame string references requires working |
235 | // relocations, but writing out the relocations themselves involves |
236 | // sync markers. To prevent infinite recursion, we simply trim the |
237 | // stack frame for sync markers within the relocation header. |
238 | var frames []string |
239 | if !w.encodingRelocHeader && w.p.syncFrames > 0 { |
240 | pcs := make([]uintptr, w.p.syncFrames) |
241 | n := runtime.Callers(2, pcs) |
242 | frames = fmtFrames(pcs[:n]...) |
243 | } |
244 | |
245 | // TODO(mdempsky): Save space by writing out stack frames as a |
246 | // linked list so we can share common stack frames. |
247 | w.rawUvarint(uint64(m)) |
248 | w.rawUvarint(uint64(len(frames))) |
249 | for _, frame := range frames { |
250 | w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame)))) |
251 | } |
252 | } |
253 | |
254 | // Bool encodes and writes a bool value into the element bitstream, |
255 | // and then returns the bool value. |
256 | // |
257 | // For simple, 2-alternative encodings, the idiomatic way to call Bool |
258 | // is something like: |
259 | // |
260 | // if w.Bool(x != 0) { |
261 | // // alternative #1 |
262 | // } else { |
263 | // // alternative #2 |
264 | // } |
265 | // |
266 | // For multi-alternative encodings, use Code instead. |
267 | func (w *Encoder) Bool(b bool) bool { |
268 | w.Sync(SyncBool) |
269 | var x byte |
270 | if b { |
271 | x = 1 |
272 | } |
273 | err := w.Data.WriteByte(x) |
274 | w.checkErr(err) |
275 | return b |
276 | } |
277 | |
278 | // Int64 encodes and writes an int64 value into the element bitstream. |
279 | func (w *Encoder) Int64(x int64) { |
280 | w.Sync(SyncInt64) |
281 | w.rawVarint(x) |
282 | } |
283 | |
284 | // Uint64 encodes and writes a uint64 value into the element bitstream. |
285 | func (w *Encoder) Uint64(x uint64) { |
286 | w.Sync(SyncUint64) |
287 | w.rawUvarint(x) |
288 | } |
289 | |
290 | // Len encodes and writes a non-negative int value into the element bitstream. |
291 | func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) } |
292 | |
293 | // Int encodes and writes an int value into the element bitstream. |
294 | func (w *Encoder) Int(x int) { w.Int64(int64(x)) } |
295 | |
296 | // Len encodes and writes a uint value into the element bitstream. |
297 | func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) } |
298 | |
299 | // Reloc encodes and writes a relocation for the given (section, |
300 | // index) pair into the element bitstream. |
301 | // |
302 | // Note: Only the index is formally written into the element |
303 | // bitstream, so bitstream decoders must know from context which |
304 | // section an encoded relocation refers to. |
305 | func (w *Encoder) Reloc(r RelocKind, idx Index) { |
306 | w.Sync(SyncUseReloc) |
307 | w.Len(w.rawReloc(r, idx)) |
308 | } |
309 | |
310 | // Code encodes and writes a Code value into the element bitstream. |
311 | func (w *Encoder) Code(c Code) { |
312 | w.Sync(c.Marker()) |
313 | w.Len(c.Value()) |
314 | } |
315 | |
316 | // String encodes and writes a string value into the element |
317 | // bitstream. |
318 | // |
319 | // Internally, strings are deduplicated by adding them to the strings |
320 | // section (if not already present), and then writing a relocation |
321 | // into the element bitstream. |
322 | func (w *Encoder) String(s string) { |
323 | w.Sync(SyncString) |
324 | w.Reloc(RelocString, w.p.StringIdx(s)) |
325 | } |
326 | |
327 | // Strings encodes and writes a variable-length slice of strings into |
328 | // the element bitstream. |
329 | func (w *Encoder) Strings(ss []string) { |
330 | w.Len(len(ss)) |
331 | for _, s := range ss { |
332 | w.String(s) |
333 | } |
334 | } |
335 | |
336 | // Value encodes and writes a constant.Value into the element |
337 | // bitstream. |
338 | func (w *Encoder) Value(val constant.Value) { |
339 | w.Sync(SyncValue) |
340 | if w.Bool(val.Kind() == constant.Complex) { |
341 | w.scalar(constant.Real(val)) |
342 | w.scalar(constant.Imag(val)) |
343 | } else { |
344 | w.scalar(val) |
345 | } |
346 | } |
347 | |
348 | func (w *Encoder) scalar(val constant.Value) { |
349 | switch v := constant.Val(val).(type) { |
350 | default: |
351 | errorf("unhandled %v (%v)", val, val.Kind()) |
352 | case bool: |
353 | w.Code(ValBool) |
354 | w.Bool(v) |
355 | case string: |
356 | w.Code(ValString) |
357 | w.String(v) |
358 | case int64: |
359 | w.Code(ValInt64) |
360 | w.Int64(v) |
361 | case *big.Int: |
362 | w.Code(ValBigInt) |
363 | w.bigInt(v) |
364 | case *big.Rat: |
365 | w.Code(ValBigRat) |
366 | w.bigInt(v.Num()) |
367 | w.bigInt(v.Denom()) |
368 | case *big.Float: |
369 | w.Code(ValBigFloat) |
370 | w.bigFloat(v) |
371 | } |
372 | } |
373 | |
374 | func (w *Encoder) bigInt(v *big.Int) { |
375 | b := v.Bytes() |
376 | w.String(string(b)) // TODO: More efficient encoding. |
377 | w.Bool(v.Sign() < 0) |
378 | } |
379 | |
380 | func (w *Encoder) bigFloat(v *big.Float) { |
381 | b := v.Append(nil, 'p', -1) |
382 | w.String(string(b)) // TODO: More efficient encoding. |
383 | } |
384 |
Members