@@ -19,7 +19,10 @@ use std::mem::MaybeUninit;
1919use std:: ptr:: NonNull ;
2020
2121pub use self :: bindings:: * ;
22- use ruby_prism_sys:: { pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t} ;
22+ use ruby_prism_sys:: {
23+ pm_buffer_free, pm_buffer_init, pm_buffer_length, pm_buffer_t, pm_buffer_value, pm_comment_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_options_free, pm_options_read, pm_options_t,
24+ pm_options_version_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_serialize, pm_serialize_parse,
25+ } ;
2326
2427/// A range in the source file.
2528pub struct Location < ' pr > {
@@ -428,6 +431,8 @@ pub struct ParseResult<'pr> {
428431 source : & ' pr [ u8 ] ,
429432 parser : NonNull < pm_parser_t > ,
430433 node : NonNull < pm_node_t > ,
434+ options_string : Vec < u8 > ,
435+ options : NonNull < pm_options_t > ,
431436}
432437
433438impl < ' pr > ParseResult < ' pr > {
@@ -529,6 +534,16 @@ impl<'pr> ParseResult<'pr> {
529534 pub fn node ( & self ) -> Node < ' _ > {
530535 Node :: new ( self . parser , self . node . as_ptr ( ) )
531536 }
537+
538+ /// Returns the serialized representation of the parse result.
539+ #[ must_use]
540+ pub fn serialize ( & self ) -> Vec < u8 > {
541+ let mut buffer = Buffer :: default ( ) ;
542+ unsafe {
543+ pm_serialize ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) , & mut buffer. buffer ) ;
544+ }
545+ buffer. value ( ) . into ( )
546+ }
532547}
533548
534549impl < ' pr > Drop for ParseResult < ' pr > {
@@ -537,10 +552,176 @@ impl<'pr> Drop for ParseResult<'pr> {
537552 pm_node_destroy ( self . parser . as_ptr ( ) , self . node . as_ptr ( ) ) ;
538553 pm_parser_free ( self . parser . as_ptr ( ) ) ;
539554 drop ( Box :: from_raw ( self . parser . as_ptr ( ) ) ) ;
555+
556+ pm_options_free ( self . options . as_ptr ( ) ) ;
557+ drop ( Box :: from_raw ( self . options . as_ptr ( ) ) ) ;
558+ }
559+ }
560+ }
561+
562+ /**
563+ * A scope of locals surrounding the code that is being parsed.
564+ */
565+ #[ derive( Debug , Default , Clone ) ]
566+ pub struct OptionsScope {
567+ /** Flags for the set of forwarding parameters in this scope. */
568+ pub forwarding_flags : u8 ,
569+ /** The names of the locals in the scope. */
570+ pub locals : Vec < String > ,
571+ }
572+
573+ /// The options that can be passed to the parser.
574+ #[ allow( clippy:: struct_excessive_bools) ]
575+ #[ derive( Debug , Clone ) ]
576+ pub struct Options {
577+ /** The name of the file that is currently being parsed. */
578+ pub filepath : String ,
579+ /**
580+ * The line within the file that the parse starts on. This value is
581+ * 1-indexed.
582+ */
583+ pub line : i32 ,
584+ /**
585+ * The name of the encoding that the source file is in. Note that this must
586+ * correspond to a name that can be found with Encoding.find in Ruby.
587+ */
588+ pub encoding : String ,
589+ /**
590+ * Whether or not the frozen string literal option has been set.
591+ * May be:
592+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
593+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
594+ * - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
595+ */
596+ pub frozen_string_literal : Option < bool > ,
597+ /** A bitset of the various options that were set on the command line. */
598+ pub command_line : u8 ,
599+ /**
600+ * The version of prism that we should be parsing with. This is used to
601+ * allow consumers to specify which behavior they want in case they need to
602+ * parse exactly as a specific version of CRuby.
603+ */
604+ pub version : pm_options_version_t ,
605+ /**
606+ * Whether or not the encoding magic comments should be respected. This is a
607+ * niche use-case where you want to parse a file with a specific encoding
608+ * but ignore any encoding magic comments at the top of the file.
609+ */
610+ pub encoding_locked : bool ,
611+ /**
612+ * When the file being parsed is the main script, the shebang will be
613+ * considered for command-line flags (or for implicit -x). The caller needs
614+ * to pass this information to the parser so that it can behave correctly.
615+ */
616+ pub main_script : bool ,
617+ /**
618+ * When the file being parsed is considered a "partial" script, jumps will
619+ * not be marked as errors if they are not contained within loops/blocks.
620+ * This is used in the case that you're parsing a script that you know will
621+ * be embedded inside another script later, but you do not have that context
622+ * yet. For example, when parsing an ERB template that will be evaluated
623+ * inside another script.
624+ */
625+ pub partial_script : bool ,
626+ /**
627+ * Whether or not the parser should freeze the nodes that it creates. This
628+ * makes it possible to have a deeply frozen AST that is safe to share
629+ * between concurrency primitives.
630+ */
631+ pub freeze : bool ,
632+ /**
633+ * The scopes surrounding the code that is being parsed. For most parses
634+ * this will be empty, but for evals it will be the locals that are in scope
635+ * surrounding the eval. Scopes are ordered from the outermost scope to the
636+ * innermost one.
637+ */
638+ pub scopes : Vec < OptionsScope > ,
639+ }
640+
641+ impl Default for Options {
642+ fn default ( ) -> Self {
643+ Self {
644+ filepath : String :: new ( ) ,
645+ line : 1 ,
646+ encoding : String :: new ( ) ,
647+ frozen_string_literal : None ,
648+ command_line : 0 ,
649+ version : pm_options_version_t:: PM_OPTIONS_VERSION_LATEST ,
650+ encoding_locked : false ,
651+ main_script : true ,
652+ partial_script : false ,
653+ freeze : false ,
654+ scopes : Vec :: new ( ) ,
655+ }
656+ }
657+ }
658+
659+ impl Options {
660+ #[ allow( clippy:: cast_possible_truncation) ]
661+ fn to_binary_string ( & self ) -> Vec < u8 > {
662+ let mut output = Vec :: new ( ) ;
663+
664+ output. extend ( ( self . filepath . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
665+ output. extend ( self . filepath . as_bytes ( ) ) ;
666+ output. extend ( self . line . to_ne_bytes ( ) ) ;
667+ output. extend ( ( self . encoding . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
668+ output. extend ( self . encoding . as_bytes ( ) ) ;
669+ output. extend ( self . frozen_string_literal . map_or_else ( || 0i8 , |frozen| if frozen { 1 } else { -1 } ) . to_ne_bytes ( ) ) ;
670+ output. push ( self . command_line ) ;
671+ output. extend ( ( self . version as u8 ) . to_ne_bytes ( ) ) ;
672+ output. push ( self . encoding_locked . into ( ) ) ;
673+ output. push ( self . main_script . into ( ) ) ;
674+ output. push ( self . partial_script . into ( ) ) ;
675+ output. push ( self . freeze . into ( ) ) ;
676+ output. extend ( ( self . scopes . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
677+ for scope in & self . scopes {
678+ output. extend ( ( scope. locals . len ( ) as u32 ) . to_ne_bytes ( ) ) ;
679+ output. extend ( scope. forwarding_flags . to_ne_bytes ( ) ) ;
680+ for local in & scope. locals {
681+ output. extend ( ( local. len ( ) as u32 ) . to_ne_bytes ( ) ) ;
682+ output. extend ( local. as_bytes ( ) ) ;
683+ }
684+ }
685+ output
686+ }
687+ }
688+
689+ struct Buffer {
690+ buffer : pm_buffer_t ,
691+ }
692+
693+ impl Default for Buffer {
694+ fn default ( ) -> Self {
695+ let buffer = unsafe {
696+ let mut uninit = MaybeUninit :: < pm_buffer_t > :: uninit ( ) ;
697+ let initialized = pm_buffer_init ( uninit. as_mut_ptr ( ) ) ;
698+ assert ! ( initialized) ;
699+ uninit. assume_init ( )
700+ } ;
701+ Self { buffer }
702+ }
703+ }
704+
705+ impl Buffer {
706+ fn length ( & self ) -> usize {
707+ unsafe { pm_buffer_length ( & self . buffer ) }
708+ }
709+
710+ fn value ( & self ) -> & [ u8 ] {
711+ unsafe {
712+ let value = pm_buffer_value ( & self . buffer ) ;
713+ let value = value. cast :: < u8 > ( ) . cast_const ( ) ;
714+ std:: slice:: from_raw_parts ( value, self . length ( ) )
540715 }
541716 }
542717}
543718
719+ impl Drop for Buffer {
720+ fn drop ( & mut self ) {
721+ unsafe { pm_buffer_free ( & mut self . buffer ) }
722+ }
723+ }
724+
544725/// Parses the given source string and returns a parse result.
545726///
546727/// # Panics
@@ -549,25 +730,56 @@ impl<'pr> Drop for ParseResult<'pr> {
549730///
550731#[ must_use]
551732pub fn parse ( source : & [ u8 ] ) -> ParseResult < ' _ > {
733+ parse_with_options ( source, & Options :: default ( ) )
734+ }
735+
736+ /// Parses the given source string and returns a parse result.
737+ ///
738+ /// # Panics
739+ ///
740+ /// Panics if the parser fails to initialize.
741+ ///
742+ #[ must_use]
743+ pub fn parse_with_options < ' pr > ( source : & ' pr [ u8 ] , options : & Options ) -> ParseResult < ' pr > {
744+ let options_string = options. to_binary_string ( ) ;
552745 unsafe {
553746 let uninit = Box :: new ( MaybeUninit :: < pm_parser_t > :: uninit ( ) ) ;
554747 let uninit = Box :: into_raw ( uninit) ;
555748
556- pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , std:: ptr:: null ( ) ) ;
749+ let options = Box :: into_raw ( Box :: new ( MaybeUninit :: < pm_options_t > :: zeroed ( ) ) ) ;
750+ pm_options_read ( ( * options) . as_mut_ptr ( ) , options_string. as_ptr ( ) . cast ( ) ) ;
751+ let options = NonNull :: new ( ( * options) . assume_init_mut ( ) ) . unwrap ( ) ;
752+
753+ pm_parser_init ( ( * uninit) . as_mut_ptr ( ) , source. as_ptr ( ) , source. len ( ) , options. as_ptr ( ) ) ;
557754
558755 let parser = ( * uninit) . assume_init_mut ( ) ;
559756 let parser = NonNull :: new_unchecked ( parser) ;
560757
561758 let node = pm_parse ( parser. as_ptr ( ) ) ;
562759 let node = NonNull :: new_unchecked ( node) ;
563760
564- ParseResult { source, parser, node }
761+ ParseResult { source, parser, node, options_string, options }
762+ }
763+ }
764+
765+ /// Serializes the given source string and returns a parse result.
766+ ///
767+ /// # Panics
768+ ///
769+ /// Panics if the parser fails to initialize.
770+ #[ must_use]
771+ pub fn serialize_parse ( source : & [ u8 ] , options : & Options ) -> Vec < u8 > {
772+ let mut buffer = Buffer :: default ( ) ;
773+ let opts = options. to_binary_string ( ) ;
774+ unsafe {
775+ pm_serialize_parse ( & mut buffer. buffer , source. as_ptr ( ) , source. len ( ) , opts. as_ptr ( ) . cast ( ) ) ;
565776 }
777+ buffer. value ( ) . into ( )
566778}
567779
568780#[ cfg( test) ]
569781mod tests {
570- use super :: parse;
782+ use super :: { parse, parse_with_options , serialize_parse } ;
571783
572784 #[ test]
573785 fn comments_test ( ) {
@@ -1157,6 +1369,28 @@ end
11571369 assert ! ( ( value - 1.0 ) . abs( ) < f64 :: EPSILON ) ;
11581370 }
11591371
1372+ #[ test]
1373+ fn serialize_parse_test ( ) {
1374+ let source = r#"__FILE__"# ;
1375+ let options = crate :: Options { filepath : "test.rb" . to_string ( ) , ..Default :: default ( ) } ;
1376+ let bytes = serialize_parse ( source. as_ref ( ) , & options) ;
1377+
1378+ let result = parse_with_options ( source. as_bytes ( ) , & options) ;
1379+
1380+ assert_eq ! ( bytes, result. serialize( ) ) ;
1381+
1382+ let expected = r#"@ ProgramNode (location: (1,0)-(1,8))
1383+ +-- locals: []
1384+ +-- statements:
1385+ @ StatementsNode (location: (1,0)-(1,8))
1386+ +-- body: (length: 1)
1387+ +-- @ SourceFileNode (location: (1,0)-(1,8))
1388+ +-- StringFlags: nil
1389+ +-- filepath: "test.rb"
1390+ "# ;
1391+ assert_eq ! ( expected, result. node( ) . pretty_print( ) . as_str( ) ) ;
1392+ }
1393+
11601394 #[ test]
11611395 fn node_field_lifetime_test ( ) {
11621396 // The code below wouldn't typecheck prior to https://github.com/ruby/prism/pull/2519,
0 commit comments