Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modular canonicalizer #1058

Merged
merged 9 commits into from
Nov 30, 2021
76 changes: 56 additions & 20 deletions src/frontend/Canonicalize.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@ open Core_kernel
open Ast
open Deprecation_analysis

type canonicalizer_settings =
{ deprecations: bool
; parenthesis: bool
; braces: bool
; (* really for the pretty printer but it makes sense to live here *)
inline_includes: bool }

let all =
{deprecations= true; parenthesis= true; inline_includes= true; braces= true}

let rec repair_syntax_stmt user_dists {stmt; smeta} =
match stmt with
| Tilde {arg; distribution= {name; id_loc}; args; truncation} ->
Expand Down Expand Up @@ -141,16 +151,7 @@ and keep_parens {expr; emeta} =

let parens_lval = map_lval_with no_parens ident

let stmt_to_block ({stmt; smeta} : typed_statement) : typed_statement =
match stmt with
| Block _ -> {stmt; smeta}
| _ ->
mk_typed_statement
~stmt:(Block [{stmt; smeta}])
~return_type:smeta.return_type ~loc:smeta.loc

let rec parens_stmt ({stmt; smeta} : typed_statement) : typed_statement =
let parens_block s = parens_stmt (stmt_to_block s) in
let stmt =
match stmt with
| VarDecl
Expand All @@ -165,30 +166,65 @@ let rec parens_stmt ({stmt; smeta} : typed_statement) : typed_statement =
; identifier
; initial_value= Option.map ~f:no_parens init
; is_global }
| While (e, s) -> While (no_parens e, parens_block s)
| While (e, s) -> While (no_parens e, parens_stmt s)
| IfThenElse (e, s1, Some ({stmt= IfThenElse _; _} as s2))
|IfThenElse (e, s1, Some {stmt= Block [({stmt= IfThenElse _; _} as s2)]; _})
->
(* Flatten if ... else if ... constructs *)
IfThenElse (no_parens e, parens_block s1, Some (parens_stmt s2))
IfThenElse (no_parens e, parens_stmt s1, Some (parens_stmt s2))
| IfThenElse (e, s1, s2) ->
IfThenElse (no_parens e, parens_block s1, Option.map ~f:parens_block s2)
IfThenElse (no_parens e, parens_stmt s1, Option.map ~f:parens_stmt s2)
WardBrian marked this conversation as resolved.
Show resolved Hide resolved
| For {loop_variable; lower_bound; upper_bound; loop_body} ->
For
{ loop_variable
; lower_bound= keep_parens lower_bound
; upper_bound= keep_parens upper_bound
; loop_body= parens_block loop_body }
; loop_body= parens_stmt loop_body }
| _ -> map_statement no_parens parens_stmt parens_lval ident stmt in
{stmt; smeta}

let repair_syntax program : untyped_program =
map_program
(repair_syntax_stmt (userdef_distributions program.functionblock))
let rec blocks_stmt ({stmt; smeta} : typed_statement) : typed_statement =
let stmt_to_block ({stmt; smeta} : typed_statement) : typed_statement =
match stmt with
| Block _ -> blocks_stmt {stmt; smeta}
| _ ->
blocks_stmt
@@ mk_typed_statement
~stmt:(Block [{stmt; smeta}])
~return_type:smeta.return_type ~loc:smeta.loc in
let stmt =
match stmt with
| While (e, s) -> While (e, stmt_to_block s)
| IfThenElse (e, s1, Some ({stmt= IfThenElse _; _} as s2))
|IfThenElse (e, s1, Some {stmt= Block [({stmt= IfThenElse _; _} as s2)]; _})
->
(* Flatten if ... else if ... constructs *)
IfThenElse (e, stmt_to_block s1, Some (blocks_stmt s2))
| IfThenElse (e, s1, s2) ->
IfThenElse (e, stmt_to_block s1, Option.map ~f:stmt_to_block s2)
| For ({loop_body; _} as f) ->
For {f with loop_body= stmt_to_block loop_body}
| _ -> map_statement ident blocks_stmt ident ident stmt in
{stmt; smeta}

let repair_syntax program settings =
if settings.deprecations then
program
|> map_program
(repair_syntax_stmt (userdef_distributions program.functionblock))
else program
Comment on lines +208 to +213
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm inclined to say that repair_syntax should be enabled for all formatting, not just deprecations. It's not about syntax that's going to be deprecated but syntax that has already been deprecated.. A program that "needs" repair_syntax isn't even going to typecheck without it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, those programs do fail to typecheck without --print-canonical. If we don't like that we could run it if and only if the formatter is activated, I suppose.


let canonicalize_program program : typed_program =
let canonicalize_program program settings : typed_program =
let program =
if settings.deprecations then
program
|> map_program
(replace_deprecated_stmt (collect_userdef_distributions program))
else program in
let program =
if settings.parenthesis then program |> map_program parens_stmt else program
in
let program =
if settings.braces then program |> map_program blocks_stmt else program
in
program
|> map_program
(replace_deprecated_stmt (collect_userdef_distributions program))
|> map_program parens_stmt
74 changes: 54 additions & 20 deletions src/stanc/stanc.ml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,15 @@ let model_file = ref ""
let pretty_print_program = ref false
let print_info_json = ref false
let filename_for_msg = ref ""
let canonicalize_program = ref false

let canonicalize_settings =
ref
Canonicalize.
{ inline_includes= false
; deprecations= false
; parenthesis= false
; braces= false }

let print_model_cpp = ref false
let dump_mir = ref false
let dump_mir_pretty = ref false
Expand All @@ -35,6 +43,21 @@ let warn_uninitialized = ref false
let warn_pedantic = ref false
let bare_functions = ref false

let parse_canonical_options (settings : Canonicalize.canonicalizer_settings)
string =
match String.lowercase string with
| "includes" -> {settings with inline_includes= true}
| "deprecations" -> {settings with deprecations= true}
| "parenthesis" -> {settings with parenthesis= true}
| "braces" -> {settings with braces= true}
| s ->
raise
@@ Arg.Bad
( "Unrecognized canonicalizer option '" ^ s
^ "'. \n\
Should be one of 'includes', 'deprecations', 'parenthesis', \
'braces'" )

(** Some example command-line options here *)
let options =
Arg.align
Expand Down Expand Up @@ -92,9 +115,22 @@ let options =
; ( "--auto-format"
, Arg.Set pretty_print_program
, " Pretty prints the program to the console" )
; ( "--canonicalize"
, Arg.String
(fun s ->
let settings =
List.fold ~f:parse_canonical_options ~init:!canonicalize_settings
(String.split s ~on:',') in
canonicalize_settings := settings )
, " Enable specific canonicalizations in a comma seperated list. Options \
are 'includes', 'deprecations', 'parenthesis', 'braces'." )
; ( "--print-canonical"
, Arg.Set canonicalize_program
, " Prints the canonicalized program to the console" )
, Arg.Unit
(fun () ->
pretty_print_program := true ;
canonicalize_settings := Canonicalize.all )
, " Prints the canonicalized program to the console. Equivalent to \
--auto-format --canonicalize [all options]" )
; ( "--version"
, Arg.Unit
(fun _ ->
Expand Down Expand Up @@ -167,11 +203,12 @@ let print_deprecated_arg_warning =
Please use --include-paths.\n"

let model_file_err () =
Arg.usage options ("Please specify one model_file.\n\n" ^ usage) ;
Arg.usage options ("Please specify a model_file.\n" ^ usage) ;
exit 127

let add_file filename =
if !model_file = "" then model_file := filename else model_file_err ()
if !model_file = "" then model_file := filename
else raise (Arg.Bad "Please specify only one model_file")

let remove_dotstan s =
if String.is_suffix ~suffix:".stanfunctions" s then String.drop_suffix s 14
Expand All @@ -184,7 +221,6 @@ let remove_dotstan s =
Fmt.flush and various other hacks to no avail. So now I use Fmt to build a
string, and Out_channel to write it.
*)

let pp_stderr formatter formatee =
Fmt.str "%a" formatter formatee |> Out_channel.(output_string stderr)

Expand All @@ -195,32 +231,30 @@ let print_or_write data =
let use_file filename =
let ast =
Frontend_utils.get_ast_or_exit filename
~print_warnings:(not !canonicalize_program)
~print_warnings:(not !canonicalize_settings.deprecations)
~bare_functions:!bare_functions in
let ast =
if !canonicalize_program then Canonicalize.repair_syntax ast else ast in
(* must be before typecheck to fix up deprecated syntax which gets rejected *)
let ast = Canonicalize.repair_syntax ast !canonicalize_settings in
Debugging.ast_logger ast ;
if !pretty_print_program && not !canonicalize_program then
print_or_write
(Pretty_printing.pretty_print_program ~bare_functions:!bare_functions ast) ;
let typed_ast = Frontend_utils.type_ast_or_exit ast in
let canonical_ast =
Canonicalize.canonicalize_program typed_ast !canonicalize_settings in
if !pretty_print_program then
print_or_write
(Pretty_printing.pretty_print_typed_program
~bare_functions:!bare_functions canonical_ast ) ;
if !print_info_json then (
print_endline (Info.info typed_ast) ;
print_endline (Info.info canonical_ast) ;
exit 0 ) ;
let printed_filename =
match !filename_for_msg with "" -> None | s -> Some s in
if not !canonicalize_program then
if not !canonicalize_settings.deprecations then
Warnings.pp_warnings Fmt.stderr ?printed_filename
(Deprecation_analysis.collect_warnings typed_ast) ;
if !canonicalize_program then
print_or_write
(Pretty_printing.pretty_print_typed_program
~bare_functions:!bare_functions
(Canonicalize.canonicalize_program typed_ast) ) ;
if !generate_data then
print_endline (Debug_data_generation.print_data_prog typed_ast) ;
Debugging.typed_ast_logger typed_ast ;
if not (!pretty_print_program || !canonicalize_program) then (
if not !pretty_print_program then (
let mir = Ast_to_Mir.trans_prog filename typed_ast in
if !dump_mir then
Sexp.pp_hum Format.std_formatter [%sexp (mir : Middle.Program.Typed.t)] ;
Expand Down
2 changes: 1 addition & 1 deletion src/stancjs/stancjs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ let stan2cpp model_name model_string is_flag_set =
( Result.Ok
(Pretty_printing.pretty_print_typed_program
~bare_functions:(is_flag_set "functions-only")
(Canonicalize.canonicalize_program typed_ast) )
(Canonicalize.canonicalize_program typed_ast Canonicalize.all) )
, warnings
, [] ) ;
if is_flag_set "debug-generate-data" then
Expand Down