-
Notifications
You must be signed in to change notification settings - Fork 18
/
serialize.lisp
1516 lines (1444 loc) · 76.6 KB
/
serialize.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; Copyright 2012-2020 Google LLC
;;;
;;; Use of this source code is governed by an MIT-style
;;; license that can be found in the LICENSE file or at
;;; https://opensource.org/licenses/MIT.
(in-package #:cl-protobufs.implementation)
;;; Protobuf serialization from Lisp objects
;;; When the optimize speed option is used we avoid using DEFMETHOD, which generates
;;; generic functions that are costly to lookup at runtime. Instead, we define
;;; the "methods" as functions that are attached to the symbol naming the class,
;;; so we can easily locate them using GET at runtime.
;;; In SBCL, generalized function names are used instead.
#+sbcl
(eval-when (:compile-toplevel :load-toplevel :execute)
(sb-int:define-function-name-syntax :protobuf (name)
(and (consp (cdr name)) (consp (cddr name)) (not (cdddr name))
;; Imo these should be :marshall :unmarshall
;; since Serialize is such an overloaded term.
(member (second name) '(:serialize :deserialize))
(symbolp (third name))
(values t (second name)))))
(defun def-pseudo-method (method-name meta-message args body)
(let ((name (etypecase meta-message
(symbol meta-message)
(message-descriptor (proto-class meta-message)))))
#+sbcl `(defun (:protobuf ,method-name ,name) ,args ,@body)
#-sbcl `(setf (get ',name ',method-name) (lambda ,args ,@body))))
(defun call-pseudo-method (method-name meta-message &rest args)
(let ((class (proto-class meta-message)))
#+sbcl
`(funcall #'(:protobuf ,method-name ,class) ,@args)
#-sbcl
`(let ((method (get ',class ',method-name)))
(assert method)
(funcall (the function method) ,@args))))
;; Within a custom serializer/deserializer "method", in SBCL it is faster to call
;; another custom "method" via the function name syntax
;; (funcall #'(:protobuf {:serialize|:deserialize} type-name) ...)
;; than it is to make the same call using (funcall (get ...)) as is done in
;; the platform-agnostic code.
;; However within the generic serializer/deserializer we unfortunately have to
;; resort to using the globaldb to ask for the fast method, which is actually
;; slower than GET. Easy come, easy go.
(defun-inline custom-serializer (type)
(the (or null function)
#+sbcl (let ((name `(:protobuf :serialize ,type)))
(if (fboundp name) (fdefinition name)))
#-sbcl (get type :serialize)))
(defun-inline custom-deserializer (type)
(the (or null function)
#+sbcl(let ((name `(:protobuf :deserialize ,type)))
(if (fboundp name) (fdefinition name)))
#-sbcl(get type :deserialize)))
;;; Serialization
(defun serialize-to-stream (object stream &optional (type (type-of object)))
"Serialize OBJECT of type TYPE onto the STREAM using wire format.
OBJECT and TYPE are as described in SERIALIZE-TO-BYTES."
(let ((buffer (serialize-to-bytes object type)))
;; Todo: serialization to a stream can skip the compactification step.
;; Instead use CALL-WITH-EACH-CHUNK on the uncompactified buffer
;; which will iterate over ranges of octets that contain no intervening
;; deletion markers.
(write-sequence buffer stream)
buffer))
(defun serialize-to-bytes (object &optional (type (type-of object)))
"Serializes OBJECT into a new vector of (unsigned-byte 8) using wire format.
TYPE is a symbol naming a protobuf descriptor class."
(or (and (slot-exists-p object '%%bytes)
(proto-%%bytes object))
(let ((fast-function
#-sbcl (get type :serialize)
#+sbcl (when (fboundp `(:protobuf :serialize ,type))
(fdefinition `(:protobuf :serialize ,type))))
(b (make-octet-buffer 100)))
(if fast-function
(funcall (the function fast-function) object b)
(serialize-message object (find-message-descriptor type) b))
(let ((compact-buf (compactify-blocks b)))
(concatenate-blocks compact-buf)))))
;; Serialize the object using the given protobuf type
(defun-inline emit-skipped-bytes (msg buffer)
"If MSG has any bytes that were 'skipped' when it was deserialized (i.e.,
because it had unrecognized fields) output them to BUFFER. This effectively
passes them through to downstream consumers. Returns the number of bytes
added to BUFFER."
(declare (buffer buffer))
(if (and (message-p msg)
(message-%%skipped-bytes msg))
(let ((skipped-bytes (message-%%skipped-bytes msg)))
(buffer-ensure-space buffer (length skipped-bytes))
(fast-octets-out buffer skipped-bytes))
0))
;; The default function uses metadata from the message descriptor.
(defun serialize-message (object msg-desc buffer)
"Serialize OBJECT with message descriptor MSG-DESC into BUFFER using wire format.
The value returned is the number of octets written to BUFFER."
(declare (buffer buffer)
(message-descriptor msg-desc))
;; Check for the %%BYTES slot, since groups do not have this slot.
(let ((size 0))
(dolist (field (proto-fields msg-desc))
(iincf size (emit-field object field buffer)))
(dolist (oneof (proto-oneofs msg-desc) size)
(let* ((fields (oneof-descriptor-fields oneof))
(data (slot-value object (oneof-descriptor-internal-name oneof)))
(set-field (oneof-set-field data))
(value (oneof-value data)))
(when set-field
(let* ((field (aref fields set-field))
(type (proto-class field))
(field-num (proto-index field))
(kind (proto-kind field)))
(iincf size
(emit-non-repeated-field value type field-num kind buffer))))))
(incf size (emit-skipped-bytes object buffer))))
(defun emit-field (object field buffer)
"Serialize a single field from an object to buffer
Parameters:
OBJECT: The protobuf object which contains the field to be serialized.
FIELD: The field-descriptor describing which field of OBJECT to serialize.
BUFFER: The buffer to serialize to."
(declare (type field-descriptor field))
(let ((kind (proto-kind field)))
(unless
(if (eq kind :extends)
(has-extension object (slot-value field 'external-field-name))
(has-field object (slot-value field 'external-field-name)))
(return-from emit-field 0))
(let* ((type (slot-value field 'class))
(field-num (proto-index field))
(value (cond ((eq kind :extends)
(get-extension object (slot-value field 'external-field-name)))
((proto-lazy-p field)
(slot-value object (slot-value field 'internal-field-name)))
(t
(proto-slot-value object (slot-value field 'external-field-name))))))
(if (eq (proto-label field) :repeated)
(or (emit-repeated-field value type (proto-packed field) field-num kind buffer)
(unknown-field-type type field object))
(or (emit-non-repeated-field value type field-num kind buffer)
(unknown-field-type type field object))))))
(defun emit-repeated-field (value type packed-p field-num kind buffer)
"Serialize a repeated field to buffer. Return nil on failure.
Parameters:
VALUE: The data to serialize, e.g. the data resulting from calling read-slot on a field.
TYPE: The proto-class of the field.
PACKED-P: Whether or not the field in question is packed.
FIELD-NUM: The number of the field (used for making tags).
KIND: The kind of being being emitted. See `proto-kind'.
BUFFER: The buffer to write to."
(declare (field-number field-num) (buffer buffer))
(let (desc)
(cond ((and packed-p (packed-type-p type))
;; Handle scalar types. proto-packed-p of enum types returns nil,
;; so packed enum fields are handled below.
(serialize-packed value type field-num buffer))
((scalarp type)
(let ((tag (make-tag type field-num))
(size 0))
(doseq (v value)
(iincf size (serialize-scalar v type tag buffer)))
size))
((setq desc (find-message-descriptor type))
(emit-repeated-message-field desc value type field-num kind buffer))
((setq desc (find-enum-descriptor type))
(if packed-p
(serialize-packed-enum value (enum-descriptor-values desc) field-num buffer)
(let ((tag (make-wire-tag $wire-type-varint field-num))
(size 0))
(doseq (name value)
(iincf size (serialize-enum name (enum-descriptor-values desc) tag buffer)))
size))))))
(defun emit-repeated-message-field (msg-desc messages type field-num kind buffer)
"Serialize a repeated message (or group) field.
Parameters:
MSG-DESC: A message-descriptor for the message or group type.
MESSAGES: The messages (or groups) to serialize.
TYPE: The symbol naming the message type.
FIELD-NUM: The number of the field being serialized.
KIND: The kind of field being emitted. See `proto-kind'.
BUFFER: The buffer to write to.
Returns: The number of bytes output to BUFFER."
(declare (message-descriptor msg-desc)
(field-number field-num)
(buffer buffer))
(let ((size 0))
(declare (fixnum size))
(if (eq kind :group)
(let ((tag1 (make-wire-tag $wire-type-start-group field-num))
(tag2 (make-wire-tag $wire-type-end-group field-num))
(fields (proto-fields msg-desc)))
(doseq (group messages)
(iincf size (encode-uint32 tag1 buffer))
(dolist (field fields)
(iincf size (emit-field group field buffer)))
(iincf size (encode-uint32 tag2 buffer))))
;; I don't understand this at all - if there is a slot, then the slot
;; holds a list of objects, otherwise just serialize this object?
(let ((tag (make-wire-tag $wire-type-string field-num))
(custom-serializer (custom-serializer type)))
(doseq (msg messages)
;; To serialize an embedded message, first say that it's
;; a string, then encode its size, then serialize its fields.
(iincf size (encode-uint32 tag buffer))
;; If MSG has %%BYTES bound, then it is a lazy field, and BYTES is
;; the pre-computed serialization of MSG, so output that.
(let ((precomputed-bytes (and (slot-exists-p msg '%%bytes)
(proto-%%bytes msg)))
(submessage-size 0))
(with-placeholder (buffer) ; reserve space for submessage-size in buffer
(cond (precomputed-bytes
(setq submessage-size (length precomputed-bytes))
(buffer-ensure-space buffer submessage-size)
(fast-octets-out buffer precomputed-bytes))
(custom-serializer
(setq submessage-size
(funcall custom-serializer msg buffer)))
(t
(setq submessage-size
(serialize-message msg msg-desc buffer))))
(iincf size (+ (backpatch submessage-size) submessage-size)))))))
size))
(defun emit-non-repeated-field (value type field-num kind buffer)
"Serialize a non-repeated field to buffer.
Parameters:
VALUE: The data to serialize, e.g. the data resulting from calling read-slot on a field.
TYPE: The :class slot of the field.
FIELD-NUM: The number of the field being serialized (used for making tags).
KIND: The kind of field being emitted. See `proto-kind'.
BUFFER: The buffer to write to.
Returns: The number of bytes output to BUFFER, or NIL on error."
(declare (field-number field-num)
(buffer buffer))
(let (desc)
(cond ((scalarp type)
(serialize-scalar value type (make-tag type field-num) buffer))
((setq desc (find-message-descriptor type))
(emit-non-repeated-message-field desc value type field-num kind buffer))
((setq desc (find-enum-descriptor type))
(serialize-enum value (enum-descriptor-values desc)
(make-wire-tag $wire-type-varint field-num)
buffer))
((setq desc (find-map-descriptor type))
(let* ((tag (make-wire-tag $wire-type-string field-num))
(key-type (proto-key-type desc))
(val-type (proto-value-type desc))
(val-kind (proto-value-kind desc)))
(flet ((serialize-pair (k v)
(let ((ret-len (encode-uint32 tag buffer))
(map-len 0))
(with-placeholder (buffer)
;; Key types are always scalar, so serialize-scalar works.
(iincf map-len (serialize-scalar k key-type
(make-tag key-type 1) buffer))
;; Value types are arbitrary, non-map, non-repeated.
(iincf map-len (emit-non-repeated-field v val-type 2 val-kind buffer))
(i+ ret-len (i+ map-len (backpatch map-len)))))))
(loop for k being the hash-keys of value
using (hash-value v)
sum (serialize-pair k v))))))))
(defun emit-non-repeated-message-field (msg-desc msg type field-num kind buffer)
"Serialize a non-repeated message field to buffer.
Parameters:
MSG-DESC: The message-descriptor for MSG.
MSG: The data to serialize.
TYPE: The :class slot of the field.
FIELD-NUM: The number of the field being serialized (used for making tags).
KIND: The kind of field being emitted. See `proto-kind'.
BUFFER: The buffer to write to.
Returns: The number of bytes output to BUFFER, or NIL on error."
(cond ((not msg)
0)
((eq kind :group)
(let ((tag1 (make-wire-tag $wire-type-start-group field-num))
(tag2 (make-wire-tag $wire-type-end-group field-num))
(size 0))
(iincf size (encode-uint32 tag1 buffer))
(dolist (f (proto-fields msg-desc))
(iincf size (emit-field msg f buffer)))
(i+ size (encode-uint32 tag2 buffer))))
(t
;; If MSG has %%BYTES bound, then it is a lazy field, and %%BYTES is
;; the pre-computed serialization of MSG, so output that.
(let ((precomputed-bytes (and (slot-exists-p msg '%%bytes)
(proto-%%bytes msg)))
(custom-serializer (custom-serializer type))
(tag-size (encode-uint32 (make-wire-tag $wire-type-string field-num) buffer))
(submessage-size 0))
(with-placeholder (buffer)
(cond (precomputed-bytes
(setq submessage-size (length precomputed-bytes))
(buffer-ensure-space buffer submessage-size)
(fast-octets-out buffer precomputed-bytes))
(custom-serializer
(setq submessage-size
(funcall custom-serializer msg buffer)))
(t
(setq submessage-size
(serialize-message msg msg-desc buffer))))
(+ tag-size (backpatch submessage-size) submessage-size))))))
;;; Deserialization
(defun deserialize-from-stream (type stream)
"Deserialize an object of type TYPE from STREAM."
(let* ((size (file-length stream))
(buffer (make-byte-vector size)))
(read-sequence buffer stream)
(deserialize-from-bytes type buffer)))
(defun deserialize-from-bytes (type buffer &optional (start 0) (end (length buffer)))
"Deserialize an object of type TYPE from BUFFER, which is a simple
array of (unsigned-byte 8).
TYPE is a symbol naming the type to be deserialized.
START is the first byte.
END is the last byte plus one.
Returns two values: the new object and the final index into BUFFER."
(check-type type symbol)
(let ((fast-function
#-sbcl (get type :deserialize)
#+sbcl (when (fboundp `(:protobuf :deserialize ,type))
(fdefinition `(:protobuf :deserialize ,type)))))
(if fast-function
(funcall (the function fast-function) buffer start end)
(%deserialize type buffer start end))))
;; Allow clients to add their own methods.
;; For example, you might want to preserve object identity.
;; (Named with leading % for historical reasons. That could be fixed now
;; and this could be exported.)
(defgeneric %deserialize (type buffer start end &optional end-tag)
(:documentation
"Deserialize an object of type TYPE from BUFFER between indices START and END.
TYPE is the Lisp name of a Protobufs message (usually the name of a
Lisp class) or a 'message-descriptor'.
END-TAG is used internally to handle the (deprecated) \"group\" feature.
The return values are the object and the index at which deserialization stopped."))
(defmethod %deserialize (type buffer start end &optional (end-tag 0))
(let ((message (find-message-descriptor type :error-p t)))
(%deserialize message buffer start end end-tag)))
;; The default method uses metadata from the message descriptor.
(defmethod %deserialize ((msg-desc message-descriptor) buffer start end
&optional (end-tag 0))
(let* ((class-name
(or (proto-alias-for msg-desc) (proto-class msg-desc)))
(class (find-class class-name)))
(deserialize-structure-object
msg-desc buffer start end end-tag class)))
(defstruct (field (:constructor make-field (index offset bool-index oneof-p initarg complex-field))
(:print-object
(lambda (self stream)
(format stream "#<~D~S>" (field-index self) (field-initarg self)))))
"Field metadata for a protocol buffer.
Contains the INDEX of the field as according to protobuf, an internal
OFFSET, the BOOL-INDEX (for simple boolean fields), a flag ONEOF-P which indicates if the field
is part of a oneof, the INITARG, the COMPLEX-FIELD datastructure.
See field-descriptor for the distinction between index, offset, and bool-number."
(index -1 :type field-number) ; TODO(cgay): rename to field-number
offset
bool-index
oneof-p
initarg
complex-field)
;; Make a map from field number to a FIELD structure in a vector.
;; As long as at least half of the vector elements will not be wasted,
;; the lookup is direct by field number, otherwise it is a hash-like lookup.
;; For consecutive indices starting at 1, direct lookup is always used.
;; Consecutive numbers starting at other than 1 could in theory be
;; direct-mapped by subtracting the "origin" but such usage is uncommon,
;; and the performance of the hash-based lookup as a fallback is adequate.
(defun make-field-map (fields)
(declare (inline make-field))
#+(and sbcl arena-allocator) (declare (sb-c::tlab :system))
(let ((count 0) (max 0))
(dolist (field fields)
(incf count)
(setf max (max (proto-index field) max)))
(flet ((wrap (field)
(make-field (proto-index field)
(proto-field-offset field)
(proto-bool-index field)
(and (proto-oneof-offset field) t)
(keywordify (proto-internal-field-name field))
field)))
(if (< max (* count 2)) ; direct map
(let ((map (make-array (1+ max) :initial-element nil)))
(setf (svref map 0) t)
(dolist (field fields map)
(setf (svref map (proto-index field)) (wrap field))))
;; hash-based map. a "cheap" computation of a good table modulus,
;; barring a prime-number test, is an odd number achieving 50% load.
(let* ((map (make-array (ash count 1) :initial-element nil))
(modulus (1- (length map))))
(dolist (field fields map)
(let ((bin (1+ (mod (proto-index field) modulus))))
(push (wrap field) (svref map bin)))))))))
;; Given a field-number and a field-map, return the FIELD metadata or NIL.
(defun-inline find-in-field-map (field-number field-map)
(declare (type fixnum field-number))
(if (svref field-map 0)
(unless (>= field-number (length field-map))
(svref field-map field-number))
(let ((modulus (1- (length field-map))))
(dolist (field (svref field-map (1+ (mod field-number modulus))))
(when (= (field-index field) field-number)
(return field))))))
(defun message-field-metadata-vector (message)
"Lazily compute and memoize a field map for message-descriptor
MESSAGE. This is not needed unless the generic deserializer is
executed."
(if (slot-boundp message 'field-vect)
(proto-field-vect message)
(setf (proto-field-vect message)
(make-field-map (append
(proto-fields message)
(loop for oneof in (proto-oneofs message)
append (coerce (oneof-descriptor-fields oneof) 'list)))))))
;; The generic deserializer collects all fields' values before applying the
;; constructor. This is identical to the the way that the
;; optimized-for-speed deserializers work. We collect the fields into an
;; ordered list with higher indices at the front, so that if the next field
;; index exceeds the index at the front of the list, it is known not to have
;; been seen yet; otherwise we scan the list and if absent, insert in the
;; correct place, or append an item into a found cell or replace the cell's
;; contents depending on whether the field is repeatable.
(defun get-field-cell (field-number field-list field-map)
"Return the cell for FIELD-NUMBER in FIELD-LIST, and as a second value,
the new list in case it was modified (as will generally be true for all
non-repeated fields upon seeing them for the first time). FIELD-MAP is a
vector that translates FIELD-NUMBER to a FIELD object. Return NIL and the
original list if FIELD-NUMBER is unknown, though this could easily return a
cell in which to collect raw octets for missing schema fields."
(declare #.$optimize-serialization)
;; FIELD-LIST is maintained as a property list so that it may be passed
;; directly to the structure constructor. This is slightly more work than
;; maintaining an alist, but avoids subsequent rearrangement.
(labels ((new-pair () ; return (#<FIELD> nil) to be spliced in somewhere
(let ((field (find-in-field-map field-number field-map)))
(if field
(list field nil)
(return-from get-field-cell (values nil field-list)))))
(insert-at-front ()
;; Serialization algorithms are encouraged to transmit fields in ascending
;; numerical order, so this should be the most common case.
(let ((pair (new-pair)))
(rplacd (cdr pair) field-list) ; splice in front
;; First return value is the cons cell for the pair,
;; second is the list as a whole, which is now headed by this pair.
(values pair pair)))
(insert-at-end (head &aux (rest (cdr head)))
(if (not rest)
(insert-after head)
(insert-at-end (cdr rest))))
(insert-after (tail)
;; A picture: list is (#<FIELD A> a-val #<FIELD C> c-val #<FIELD D> d-val ...)
;; ^--- TAIL points here
;; to insert newly-seen field B after field A, replace the cdr of TAIL
;; with new-pair, and new-pair's tail with CDR of TAIL
(let ((pair (new-pair)))
(rplacd (cdr pair) (cdr tail))
(rplacd tail pair)
;; As above, first value is the cons cell for the pair,
;; second is the original list since it was destructively altered.
(values pair field-list)))
(insert-in (splice-point &aux (rest (cdr splice-point)))
(if (not rest)
(insert-after splice-point)
;; REST is the head of the next pair in the plist
(let* ((field (car rest))
(index (field-index field)))
(cond ((i> field-number index) ; unseen, and in between two seen indices
(insert-after splice-point))
((i= field-number index) ; a field which has been seen before
(values rest field-list))
(t ; keep on looking
(insert-in (cdr rest))))))))
(if (not field-list)
(insert-at-front)
(let* ((cur-field (find-in-field-map field-number field-map))
(top-field (car field-list))
(index (field-index top-field)))
(if (and cur-field (field-oneof-p cur-field))
;; If a field is part of a oneof, put it at the end of the plist.
;; This is to preserve the behavior that if two fields from the same
;; oneof are recieved on the wire, then only the last one is set. Since
;; this list sorts fields by their index, this information is lost here,
;; so oneofs need to ignore this heuristic.
(insert-at-end (cdr field-list))
(cond ((i> field-number index) ; greater than any field number seen thus far
(insert-at-front))
((i= field-number index) ; a field number which has been seen before
(values field-list field-list))
(t ; keep on looking
(insert-in (cdr field-list)))))))))
(defun-inline make-skipped-byte-vector (skipped-bytes-tuples buffer)
"Take the list of skipped byte in buffer noted by the offsets in
skipped-bytes-tuples and place them in an array that will be returned.
Parameters:
SKIPPED-BYTES-TUPLES: A list of (low . high) offsets into buffer
representing the ranges of bytes that can't be deserialized.
BUFFER: The buffer containing the protobuf message we're deserializing."
(declare (type (simple-array (unsigned-byte 8)) buffer))
(let* ((skipped-bytes-length
(loop for (low . high) in skipped-bytes-tuples
sum (- high low)))
(skipped-bytes (make-array skipped-bytes-length
:element-type '(unsigned-byte 8))))
(loop for current-start = 0 then (i+ current-start
(i- high low))
for (low . high) in skipped-bytes-tuples
do
(replace skipped-bytes buffer
:start1 current-start
:start2 low
:end2 high))
skipped-bytes))
(defun deserialize-structure-object (message buffer index limit end-tag class)
"Deserialize a message.
Parameters:
MESSAGE: The message-descriptor of the data to be deserialized
BUFFER: The buffer to read from.
INDEX: The index of the buffer to read from.
LIMIT: The upper bound of INDEX.
END-TAG: [For groups only] The tag which ends a group.
CLASS: The class which will be created and returned."
(declare (type (simple-array (unsigned-byte 8)) buffer))
(let ((index (or index 0))
(limit (or limit (length buffer)))
;; This quickly translates a field number to its PROTO-FIELD object
;; without using linear scan.
(field-map (message-field-metadata-vector message))
(old-index index)
offset-list extension-list bool-map
initargs initargs-final tag skipped-bytes-tuple)
(loop
(setf old-index index)
(multiple-value-setq (tag index)
(if (i< index limit) (decode-uint32 buffer index) (values 0 index)))
(when (i= tag end-tag)
;; We're done if we've gotten to the end index or
;; we see an end tag that matches a previous group's start tag
;; Note that the default end tag is 0, which is also an end of
;; message marker (there can never be "real" zero tags because
;; field indices start at 1)
(loop for cell on initargs by #'cddr
do
(let* ((field (car cell))
(inner-index (field-offset field))
(bool-index (field-bool-index field))
(initargs (field-initarg field))
;; Get the full metadata from the brief metadata.
(field (field-complex-field field))
(oneof-offset (proto-oneof-offset field)))
(rplaca cell initargs)
(when (eq (proto-label field) :repeated)
(let ((data (nreverse (second cell))))
(setf (second cell)
(if (eq :vector (proto-container field))
(coerce data 'vector) data))))
(cond ((eq (proto-kind field) :extends)
;; If an extension we'll have to set it manually later...
(push `(,(proto-internal-field-name field) ,(second cell))
extension-list))
(bool-index
(push (cons bool-index (second cell)) bool-map)
(when inner-index
(push inner-index offset-list)))
;; Fields contained in a oneof need to be wrapped in
;; a oneof struct.
(oneof-offset
(push (make-oneof
:value (second cell)
:set-field oneof-offset)
initargs-final)
(push (car cell) initargs-final))
;; Otherwise we have to mark is set later.
(t
(push (second cell) initargs-final)
(push (car cell) initargs-final)
(when inner-index
(push inner-index offset-list))))))
(let ((new-struct
;; For SBCL, a defstruct description conveys the constructor name.
;; Otherwise we have to _guess_ the constructor for the object, as
;; we have no idea if MAKE-INSTANCE will actually work.
;; And for #+sbcl, passing the CLASS rather than its name avoids
;; an unecessary detour through the global name->class mapping.
(apply (get-constructor-name (class-name class))
initargs-final)))
;; Most fields in a proto are set above.
;; Special care must be given for extensions,
;; booleans, and bytes we can't deserialize
;; but may be useful later.
;; For example when we receive fields that don't
;; exist in our version of the message
(loop for extension in extension-list do
(set-extension new-struct (first extension) (second extension)))
(when bool-map
(loop with bool-vec = (slot-value new-struct '%%bool-values)
for (bool-index . value) in bool-map do
(setf (bit bool-vec bool-index) (if value 1 0))))
(loop with is-set = (slot-value new-struct '%%is-set)
for offset in offset-list do
(setf (bit is-set offset) 1))
(when skipped-bytes-tuple
(setf (message-%%skipped-bytes new-struct)
(make-skipped-byte-vector skipped-bytes-tuple buffer)))
(return-from deserialize-structure-object
(values new-struct index))))
(multiple-value-bind (cell updated-list)
(get-field-cell (ilogand (iash tag -3) +max-field-number+) initargs field-map)
(setq initargs updated-list)
(if (not cell)
(progn
(setf index (skip-element buffer index tag))
(push (cons old-index index) skipped-bytes-tuple))
;; cell = (#<field> DATA . more) - "more" is the tail of the plist
;; CELL now points to the cons where DATA should go.
(let* ((field (field-complex-field (pop cell)))
(repeated-p (eq (proto-label field) :repeated))
(lazy-p (proto-lazy-p field))
(type (proto-class field))
(data))
;; If we are deseralizing a map type, we want to (create and) add
;; to an existing hash table in the CELL cons.
(let ((map-desc (find-map-descriptor type)))
(if map-desc
(progn
(unless (car cell)
(setf (car cell)
(make-hash-table :test (if (eql (proto-key-type map-desc) 'string)
#'equal
#'eq))))
(let (map-tag map-len key-data start val-data)
(multiple-value-setq (map-len index)
(decode-uint32 buffer index))
(setq start index)
(loop
(when (= index (+ map-len start))
(assert key-data)
(setf (gethash key-data (car cell)) val-data)
(return))
(multiple-value-setq (map-tag index)
(decode-uint32 buffer index))
;; Check if data on the wire is a key
;; Keys are always scalar types, so
;; just deserialize it.
(if (= 1 (ilogand (iash map-tag -3) +max-field-number+))
(multiple-value-setq (key-data index)
(deserialize-scalar (proto-key-type map-desc) buffer index))
;; Otherwise it must be a value, which has
;; arbitrary type.
(multiple-value-setq (val-data index)
(deserialize-structure-object-field
(proto-value-type map-desc) buffer index map-tag nil nil))))))
(rplaca cell
(progn
(multiple-value-setq (data index)
(deserialize-structure-object-field
type buffer index tag repeated-p lazy-p cell))
data))))))))))
(defun deserialize-structure-object-field
(type buffer index tag repeated-p lazy-p &optional (cell nil))
"Deserialize a single field from the wire, and return it.
Parameters:
TYPE: The class of the field to deserialize.
BUFFER: The buffer to deserialize from.
INDEX: The index of the buffer to read.
TAG: The protobuf tag of the field to deserialize.
REPEATED-P: True if and only if the field is repeated
LAZY-P: True if and only if the field is lazy
CELL: [For repeated fields only]: The current list (or vector) of
deserialized objects to add to."
(cond
((scalarp type)
(cond ((and (packed-type-p type)
(length-encoded-tag-p tag))
(multiple-value-bind (data new-index)
(deserialize-packed type buffer index)
;; Multiple occurrences of packed fields must append.
;; All repeating fields will be reversed before calling
;; the structure constructor, so reverse here to counteract.
(values (nreconc data (car cell)) new-index)))
(t
(multiple-value-bind (data new-index)
(deserialize-scalar type buffer index)
(values (if repeated-p (cons data (car cell)) data)
new-index)))))
(t (let ((enum (find-enum-descriptor type)))
(if enum
(cond ((length-encoded-tag-p tag)
(multiple-value-bind (data new-index)
(deserialize-packed-enum (enum-descriptor-values enum)
buffer index)
(values (nreconc data (car cell)) new-index)))
(t
(multiple-value-bind (data new-index)
(deserialize-enum (enum-descriptor-values enum)
buffer index)
(values (if repeated-p (cons data (car cell)) data)
new-index))))
(let* ((submessage (find-message-descriptor type :error-p t))
(deserializer (custom-deserializer type))
(group-p (i= (logand tag 7) $wire-type-start-group))
(end-tag (if group-p
(ilogior $wire-type-end-group
(logand #xfFFFFFF8 tag))
0)))
(if group-p
(multiple-value-bind (obj end)
(cond (deserializer
(funcall deserializer buffer index (length buffer) end-tag))
(t
(%deserialize
submessage buffer index nil end-tag)))
(values (if repeated-p (cons obj (car cell)) obj)
end))
(multiple-value-bind (embedded-msg-len start)
(decode-uint32 buffer index)
(let* ((end (+ start embedded-msg-len))
(deserializer (custom-deserializer type))
(obj
(cond (lazy-p
;; For lazy fields, just store bytes in the %%bytes field.
(make-message-with-bytes type (subseq buffer start end)))
(deserializer
(funcall deserializer buffer
start end end-tag))
(t
(%deserialize
submessage buffer
start end end-tag)))))
(values (if repeated-p (cons obj (car cell)) obj)
end))))))))))
(defun generate-repeated-field-serializer
(class kind index boundp reader vbuf size vector-p &optional (packed-p nil))
"Generate the field serializer for a repeated field
Parameters:
CLASS: The class of the field.
KIND: The kind of field being emitted. See `proto-kind'.
INDEX: The index of the field
BOUNDP: Symbol naming a variable that evaluates to T if this field is set.
READER: Symbol naming a function which returns the field value.
VBUF: Symbol naming the buffer to write to.
SIZE: Symbol naming the variable which keeps track of the serialized length.
VECTOR-P: If true, the field is serialized as a vector. Otherwise, it is a list.
PACKED-P: True if and only if the field is packed."
(let ((vval (gensym "VAL"))
(iterator (if vector-p 'dovector 'dolist))
(msg (and class (not (scalarp class))
(or (find-message-descriptor class)
(find-enum-descriptor class)
(find-map-descriptor class)))))
(cond ((and packed-p (packed-type-p class))
`(iincf ,size (serialize-packed ,reader ',class ,index ,vbuf ,vector-p)))
((scalarp class)
(let ((tag (make-tag class index)))
`(when ,boundp
(,iterator (,vval ,reader)
(iincf ,size (serialize-scalar ,vval ',class ,tag ,vbuf))))))
((typep msg 'message-descriptor)
(if (eq kind :group)
;; The end tag for a group is the field index shifted and
;; and-ed with a constant.
(let ((tag1 (make-wire-tag $wire-type-start-group index))
(tag2 (make-wire-tag $wire-type-end-group index)))
`(when ,boundp
(,iterator (,vval ,reader)
(iincf ,size (encode-uint32 ,tag1 ,vbuf))
(iincf ,size ,(call-pseudo-method :serialize msg vval vbuf))
(iincf ,size (encode-uint32 ,tag2 ,vbuf)))))
(let ((tag (make-wire-tag $wire-type-string index)))
`(when ,boundp
(,iterator (,vval ,reader)
(iincf ,size (encode-uint32 ,tag ,vbuf))
(with-placeholder (,vbuf)
(let ((len ,(call-pseudo-method
:serialize msg vval vbuf)))
(iincf ,size (i+ len (backpatch len))))))))))
((typep msg 'enum-descriptor)
(let ((tag (make-wire-tag $wire-type-varint index)))
(if packed-p
`(iincf ,size
(serialize-packed-enum ,reader '(,@(enum-descriptor-values msg))
,index ,vbuf))
`(when ,boundp
(,iterator (,vval ,reader)
(iincf ,size (serialize-enum
,vval '(,@(enum-descriptor-values msg))
,tag ,vbuf))))))))))
(defun generate-non-repeated-field-serializer (class kind field-num boundp reader vbuf size)
"Generate the field serializer for a non-repeated field
Parameters:
CLASS: The class of the field.
KIND: The kind of field being emitted. See `proto-kind'.
FIELD-NUM: The field number.
BOUNDP: Symbol naming a variable that evaluates to T if this field is set.
READER: Symbol naming a function which returns the field value.
VBUF: Symbol naming the buffer to write to.
SIZE: Symbol naming the variable which keeps track of the serialized length."
(declare (type field-number field-num))
(let ((vval (gensym "VAL"))
(msg (and class
(not (scalarp class))
(or (find-message-descriptor class)
(find-enum-descriptor class)
(find-map-descriptor class)))))
(cond ((scalarp class)
(let ((tag (make-tag class field-num)))
`(when ,boundp
(let ((,vval ,reader))
(iincf ,size (serialize-scalar ,vval ',class ,tag ,vbuf))))))
((typep msg 'message-descriptor)
(if (eq kind :group)
(let ((tag1 (make-wire-tag $wire-type-start-group field-num))
(tag2 (make-wire-tag $wire-type-end-group field-num)))
`(let ((,vval ,reader))
(when ,vval
(iincf ,size (encode-uint32 ,tag1 ,vbuf))
(iincf ,size ,(call-pseudo-method :serialize msg vval vbuf))
(iincf ,size (encode-uint32 ,tag2 ,vbuf)))))
(let ((tag (make-wire-tag $wire-type-string field-num)))
`(let ((,vval ,reader))
(when ,vval
(iincf ,size (encode-uint32 ,tag ,vbuf))
(with-placeholder (,vbuf)
(let ((len ,(call-pseudo-method :serialize msg vval vbuf)))
(iincf ,size (i+ len (backpatch len))))))))))
((typep msg 'enum-descriptor)
(let ((tag (make-wire-tag $wire-type-varint field-num)))
`(when ,boundp
(let ((,vval ,reader))
(iincf ,size (serialize-enum
,vval '(,@(enum-descriptor-values msg))
,tag ,vbuf))))))
((typep msg 'map-descriptor)
(let* ((tag (make-wire-tag $wire-type-string field-num))
(key-type (proto-key-type msg)))
`(when ,boundp
(let ((,vval ,reader))
(flet ((serialize-pair (k v)
(let ((ret-len (encode-uint32 ,tag ,vbuf))
(map-len 0))
(with-placeholder (,vbuf)
(iincf map-len (serialize-scalar k ',key-type
,(make-tag `,key-type 1)
,vbuf))
,(generate-non-repeated-field-serializer
`,(proto-value-type msg) (proto-value-kind msg)
2 'v 'v vbuf 'map-len)
(i+ ret-len (i+ map-len (backpatch map-len)))))))
(iincf ,size (loop for k being the hash-keys of ,vval using (hash-value v)
sum (serialize-pair k v)))))))))))
;;; Compile-time generation of serializers
;;; Type-checking is done at the top-level methods specialized on 'symbol',
;;; so we turn off all type checking at the level of these functions
(defun generate-field-serializer (msg field boundp reader vbuf size)
"Generate the serializer for a field.
Parameters:
MSG: The containing message-descriptor.
FIELD: The field-descriptor for the field to serialize.
BOUNDP: A symbol which evaluates to true if the field is bound.
READER: A symbol which evaluates to the field's data.
VBUF: The buffer to write to.
SIZE: A symbol which stores the number of bytes serialized."
(let* ((class (proto-class field))
(field-num (proto-index field)))
(when reader
(if (eq (proto-label field) :repeated)
(let ((vector-p (eq :vector (proto-container field)))
(packed-p (proto-packed field)))
(or (generate-repeated-field-serializer
class (proto-kind field) field-num boundp reader vbuf size vector-p packed-p)
(unknown-field-type class field msg)))
(or (generate-non-repeated-field-serializer
class (proto-kind field) field-num boundp reader vbuf size)
(unknown-field-type class field msg))))))
;; Note well: keep this in sync with the main 'serialize' method above
(defun generate-serializer-body (message vobj vbuf size)
"Generate the body of a 'serialize' method for the given message.
Parameters:
MESSAGE: The message-descriptor to generate a serializer for.
VOBJ: A gensym'd symbol which will hold the object to be serialized.
VBUF: A gensym'd symbol which will hold the buffer to serialize to.
SIZE: A gensym'd symbol which will hold the number of bytes serialized."
(when (and (null (proto-fields message))
(null (proto-oneofs message)))
(return-from generate-serializer-body nil))
(nreverse
(let (serializers)
(dolist (field (proto-fields message))
(let* ((class (proto-class field))
(msg (and class (not (scalarp class))
(or (find-message-descriptor class)
(find-enum-descriptor class))))
(field-name (proto-external-field-name field))
(extension-p (eq (proto-kind field) :extends))
(reader (if extension-p
`(,field-name ,vobj)
`(,(proto-slot-function-name
(proto-class message) field-name :get)
,vobj)))
(boundp (if extension-p
`(has-extension ,vobj ',field-name)
`(,(proto-slot-function-name
(proto-class message) field-name :internal-has)
,vobj))))
(push (generate-field-serializer msg field boundp reader vbuf size)
serializers)))
(dolist (oneof (proto-oneofs message) serializers)
(push (generate-oneof-serializer message oneof vobj vbuf size)
serializers)))))
(defmacro make-serializer (message-name)
"Create the serializer for a message.
Parameters:
MESSAGE-NAME: The symbol name of a message."
(generate-serializer (find-message-descriptor message-name)))
(defun generate-serializer (message)
(let ((vobj (make-symbol "OBJ"))
(vbuf (make-symbol "BUF"))
(size (make-symbol "SIZE"))
(bytes (make-symbol "BYTES")))
(multiple-value-bind (serializers)
(generate-serializer-body message vobj vbuf size)
(def-pseudo-method :serialize message `(,vobj ,vbuf &aux (,size 0))
`((declare ,$optimize-serialization)
(declare (ignorable ,vobj ,vbuf))
(declare ; maybe allow specification of the type
#+ignore(type ,(proto-class message) ,vobj)
(type fixnum ,size))
(let ((,bytes (proto-%%bytes ,vobj)))
;; If BYTES is bound, then VOBJ is a lazy field, and BYTES is the pre-computed
;; serialization of VOBJ. So, just output that.
(cond
(,bytes
(setf ,size (length ,bytes))
(buffer-ensure-space ,vbuf ,size)
(fast-octets-out ,vbuf ,bytes)
,size)
(t
,@serializers
(incf ,size (emit-skipped-bytes ,vobj ,vbuf))))))))))
(defun generate-oneof-serializer (message oneof vobj vbuf size)
"Creates and returns the code that serializes a oneof.
Parameters:
MESSAGE: The message-descriptor for the containing message.
ONEOF: The oneof-descriptor to create a serializer for.