diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 1ee5c16..9cadc72 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -507,13 +507,8 @@ impl<'input> Lexer<'input> { self.push_next_char(&mut hexadecimal_literal, c, &mut offset); } _ => { - let hexadecimal_literal = - i64::from_str_radix(&*hexadecimal_literal, 16).unwrap(); - return Some(Ok(( - start, - Token::IntegerLiteral(hexadecimal_literal), - offset, - ))); + let token = to_integer_literal(&hexadecimal_literal, 16); + return Some(Ok((start, token, offset))); } } }, @@ -645,7 +640,7 @@ impl<'input> Lexer<'input> { if c > '7' { if !self.lookahead_for_decimal_point() { - return Some(Ok((start, Token::IntegerLiteral(0), offset - 1))); + return Some(Ok((start, Token::SignedIntegerLiteral(0), offset - 1))); } self.chars.next(); @@ -664,12 +659,8 @@ impl<'input> Lexer<'input> { Some(&(_, c @ '0'...'9')) => { if c > '7' { if !self.lookahead_for_decimal_point() { - let literal = i64::from_str_radix(&*literal, 8).unwrap(); - return Some(Ok(( - start, - Token::IntegerLiteral(literal), - offset, - ))); + let token = to_integer_literal(&literal, 8); + return Some(Ok((start, token, offset))); } self.push_next_char(&mut literal, c, &mut offset); @@ -714,8 +705,8 @@ impl<'input> Lexer<'input> { ); } _ => { - let literal = i64::from_str_radix(&*literal, 8).unwrap(); - return Some(Ok((start, Token::IntegerLiteral(literal), offset))); + let token = to_integer_literal(&literal, 8); + return Some(Ok((start, token, offset))); } } } @@ -738,7 +729,7 @@ impl<'input> Lexer<'input> { FloatLexState::ImmediatelyAfterExponentBase, ) } - _ => Some(Ok((start, Token::IntegerLiteral(0), start + 1))), + _ => Some(Ok((start, Token::SignedIntegerLiteral(0), start + 1))), }, c => { literal.push(c); @@ -770,8 +761,8 @@ impl<'input> Lexer<'input> { ); } _ => { - let literal = literal.parse::().unwrap(); - return Some(Ok((start, Token::IntegerLiteral(literal), offset))); + let token = to_integer_literal(&literal, 10); + return Some(Ok((start, token, offset))); } } } @@ -793,7 +784,8 @@ impl<'input> Lexer<'input> { /// Continues lexing negative infinity. The call assumption is that `-` has already been /// lexed with the assumption that "I" follows. fn lex_negative_infinity(&mut self, start: usize) -> Option<::Item> { - let infinity = self.chars + let infinity = self + .chars .clone() .take(8) .map(|(_, c)| c) @@ -875,6 +867,16 @@ impl<'input> Iterator for Lexer<'input> { } } +// Converts the string literal into either a `i64` or `u64`, preferring `i64`. This will panic if +// the literal string is too big to fit. +fn to_integer_literal(literal: &str, radix: u32) -> Token { + if let Ok(literal) = i64::from_str_radix(literal, radix) { + Token::SignedIntegerLiteral(literal) + } else { + Token::UnsignedIntegerLiteral(u64::from_str_radix(literal, radix).unwrap()) + } +} + #[cfg(test)] mod test { use super::*; @@ -900,15 +902,17 @@ mod test { assert_lex("/* this is a comment */", vec![]); assert_lex( "/* this is a comment", - vec![ - Err(create_error(LexicalErrorCode::ExpectedCommentBlockEnd, 20)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedCommentBlockEnd, + 20, + ))], ); assert_lex( "/* this is a comment*", - vec![ - Err(create_error(LexicalErrorCode::ExpectedCommentBlockEnd, 21)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedCommentBlockEnd, + 21, + ))], ); } @@ -944,21 +948,24 @@ mod test { assert_lex("041e+9", vec![Ok((0, Token::FloatLiteral(41e+9), 6))]); assert_lex( "021e", - vec![ - Err(create_error(LexicalErrorCode::ExpectedFloatExponent, 4)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedFloatExponent, + 4, + ))], ); assert_lex( "01e+", - vec![ - Err(create_error(LexicalErrorCode::ExpectedFloatExponent, 4)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedFloatExponent, + 4, + ))], ); assert_lex( "01e-", - vec![ - Err(create_error(LexicalErrorCode::ExpectedFloatExponent, 4)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedFloatExponent, + 4, + ))], ); // Without leading 0 @@ -972,21 +979,24 @@ mod test { assert_lex("41e+9", vec![Ok((0, Token::FloatLiteral(41e+9), 5))]); assert_lex( "21e", - vec![ - Err(create_error(LexicalErrorCode::ExpectedFloatExponent, 3)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedFloatExponent, + 3, + ))], ); assert_lex( "1e+", - vec![ - Err(create_error(LexicalErrorCode::ExpectedFloatExponent, 3)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedFloatExponent, + 3, + ))], ); assert_lex( "1e-", - vec![ - Err(create_error(LexicalErrorCode::ExpectedFloatExponent, 3)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedFloatExponent, + 3, + ))], ); // With leading decimal point @@ -1045,55 +1055,161 @@ mod test { #[test] fn lex_integer_literal() { // Decimal - assert_lex("1", vec![Ok((0, Token::IntegerLiteral(1), 1))]); - assert_lex("9624", vec![Ok((0, Token::IntegerLiteral(9624), 4))]); - assert_lex("-1", vec![Ok((0, Token::IntegerLiteral(-1), 2))]); - assert_lex("-9624", vec![Ok((0, Token::IntegerLiteral(-9624), 5))]); + assert_lex("1", vec![Ok((0, Token::SignedIntegerLiteral(1), 1))]); + assert_lex("9624", vec![Ok((0, Token::SignedIntegerLiteral(9624), 4))]); + assert_lex("-1", vec![Ok((0, Token::SignedIntegerLiteral(-1), 2))]); + assert_lex( + "-9624", + vec![Ok((0, Token::SignedIntegerLiteral(-9624), 5))], + ); // Hexadecimal - assert_lex("0x0", vec![Ok((0, Token::IntegerLiteral(0x0), 3))]); + assert_lex("0x0", vec![Ok((0, Token::SignedIntegerLiteral(0x0), 3))]); assert_lex( "0x1234FF", - vec![Ok((0, Token::IntegerLiteral(0x1234FF), 8))], + vec![Ok((0, Token::SignedIntegerLiteral(0x1234FF), 8))], ); assert_lex( "0x", - vec![ - Err(create_error(LexicalErrorCode::ExpectedHexadecimalDigit, 2)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedHexadecimalDigit, + 2, + ))], ); - assert_lex("-0x0", vec![Ok((0, Token::IntegerLiteral(0x0), 4))]); + assert_lex("-0x0", vec![Ok((0, Token::SignedIntegerLiteral(0x0), 4))]); assert_lex( "-0x1234FF", - vec![Ok((0, Token::IntegerLiteral(-0x1234FF), 9))], + vec![Ok((0, Token::SignedIntegerLiteral(-0x1234FF), 9))], ); assert_lex( "-0x", - vec![ - Err(create_error(LexicalErrorCode::ExpectedHexadecimalDigit, 3)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedHexadecimalDigit, + 3, + ))], ); // Octal - assert_lex("0", vec![Ok((0, Token::IntegerLiteral(0), 1))]); - assert_lex("0624", vec![Ok((0, Token::IntegerLiteral(0o624), 4))]); - assert_lex("-0624", vec![Ok((0, Token::IntegerLiteral(-0o624), 5))]); + assert_lex("0", vec![Ok((0, Token::SignedIntegerLiteral(0), 1))]); + assert_lex("0624", vec![Ok((0, Token::SignedIntegerLiteral(0o624), 4))]); + assert_lex( + "-0624", + vec![Ok((0, Token::SignedIntegerLiteral(-0o624), 5))], + ); // Octal integer literal followed by non-octal digits. assert_lex( "08", vec![ - Ok((0, Token::IntegerLiteral(0), 1)), - Ok((1, Token::IntegerLiteral(8), 2)), + Ok((0, Token::SignedIntegerLiteral(0), 1)), + Ok((1, Token::SignedIntegerLiteral(8), 2)), ], ); assert_lex( "01238", vec![ - Ok((0, Token::IntegerLiteral(0o123), 4)), - Ok((4, Token::IntegerLiteral(8), 5)), + Ok((0, Token::SignedIntegerLiteral(0o123), 4)), + Ok((4, Token::SignedIntegerLiteral(8), 5)), ], ); + + // Max/Min + assert_lex( + "18446744073709551615", + vec![Ok(( + 0, + Token::UnsignedIntegerLiteral(18446744073709551615), + 20, + ))], + ); + assert_lex( + "01777777777777777777777", + vec![Ok(( + 0, + Token::UnsignedIntegerLiteral(18446744073709551615), + 23, + ))], + ); + assert_lex( + "0xFFFFFFFFFFFFFFFF", + vec![Ok(( + 0, + Token::UnsignedIntegerLiteral(18446744073709551615), + 18, + ))], + ); + assert_lex( + "9223372036854775807", + vec![Ok(( + 0, + Token::SignedIntegerLiteral(9223372036854775807), + 19, + ))], + ); + assert_lex( + "9223372036854775808", + vec![Ok(( + 0, + Token::UnsignedIntegerLiteral(9223372036854775808), + 19, + ))], + ); + assert_lex( + "0777777777777777777777", + vec![Ok(( + 0, + Token::SignedIntegerLiteral(9223372036854775807), + 22, + ))], + ); + assert_lex( + "01000000000000000000000", + vec![Ok(( + 0, + Token::UnsignedIntegerLiteral(9223372036854775808), + 23, + ))], + ); + assert_lex( + "0x7FFFFFFFFFFFFFFF", + vec![Ok(( + 0, + Token::SignedIntegerLiteral(9223372036854775807), + 18, + ))], + ); + assert_lex( + "0x8000000000000000", + vec![Ok(( + 0, + Token::UnsignedIntegerLiteral(9223372036854775808), + 18, + ))], + ); + assert_lex( + "-9223372036854775808", + vec![Ok(( + 0, + Token::SignedIntegerLiteral(-9223372036854775808), + 20, + ))], + ); + assert_lex( + "-01000000000000000000000", + vec![Ok(( + 0, + Token::SignedIntegerLiteral(-9223372036854775808), + 24, + ))], + ); + assert_lex( + "-0x8000000000000000", + vec![Ok(( + 0, + Token::SignedIntegerLiteral(-9223372036854775808), + 19, + ))], + ); } #[test] @@ -1240,15 +1356,18 @@ mod test { fn lex_string() { assert_lex( r#""this is a string""#, - vec![ - Ok((0, Token::StringLiteral("this is a string".to_string()), 18)), - ], + vec![Ok(( + 0, + Token::StringLiteral("this is a string".to_string()), + 18, + ))], ); assert_lex( r#""this is a string"#, - vec![ - Err(create_error(LexicalErrorCode::ExpectedStringLiteralEnd, 18)), - ], + vec![Err(create_error( + LexicalErrorCode::ExpectedStringLiteralEnd, + 18, + ))], ); } diff --git a/src/lexer/token.rs b/src/lexer/token.rs index c57791d..140c682 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -77,9 +77,10 @@ pub enum Token { // Regular expressions FloatLiteral(f64), Identifier(String), - IntegerLiteral(i64), OtherLiteral(char), + SignedIntegerLiteral(i64), StringLiteral(String), + UnsignedIntegerLiteral(u64), // Symbols Colon, diff --git a/src/parser/ast.rs b/src/parser/ast.rs index c207a89..8c3fd19 100644 --- a/src/parser/ast.rs +++ b/src/parser/ast.rs @@ -134,8 +134,9 @@ pub enum ConstType { pub enum ConstValue { BooleanLiteral(bool), FloatLiteral(f64), - IntegerLiteral(i64), Null, + SignedIntegerLiteral(i64), + UnsignedIntegerLiteral(u64), } #[derive(Clone, Debug, PartialEq)] @@ -396,9 +397,10 @@ pub enum Other { FloatLiteral(f64), Identifier(Identifier), - IntegerLiteral(i64), OtherLiteral(char), + SignedIntegerLiteral(i64), StringLiteral(String), + UnsignedIntegerLiteral(u64), Colon, Ellipsis, @@ -611,25 +613,23 @@ mod test { #[test] fn test_flatten_asts() { - let ast1 = vec![ - Definition::Interface(Interface::NonPartial(NonPartialInterface { + let ast1 = vec![Definition::Interface(Interface::NonPartial( + NonPartialInterface { extended_attributes: vec![], inherits: None, members: vec![], name: "Node".to_string(), - })), - ]; - let ast2 = vec![ - Definition::Typedef(Typedef { + }, + ))]; + let ast2 = vec![Definition::Typedef(Typedef { + extended_attributes: vec![], + name: "Typedef".to_string(), + type_: Box::new(Type { extended_attributes: vec![], - name: "Typedef".to_string(), - type_: Box::new(Type { - extended_attributes: vec![], - kind: TypeKind::Any, - nullable: false, - }), + kind: TypeKind::Any, + nullable: false, }), - ]; + })]; assert_eq!( flatten_asts(vec![ast1.clone(), ast2.clone()]), diff --git a/src/parser/grammar.lalrpop b/src/parser/grammar.lalrpop index deb0ea5..c9fcdd8 100644 --- a/src/parser/grammar.lalrpop +++ b/src/parser/grammar.lalrpop @@ -207,7 +207,8 @@ ConstType: (ast::ConstType, bool) = { ConstValue: ast::ConstValue = { BooleanLiteral => ast::ConstValue::BooleanLiteral(<>), FloatLiteral => ast::ConstValue::FloatLiteral(<>), - "IntegerLiteral" => ast::ConstValue::IntegerLiteral(<>), + "SignedIntegerLiteral" => ast::ConstValue::SignedIntegerLiteral(<>), + "UnsignedIntegerLiteral" => ast::ConstValue::UnsignedIntegerLiteral(<>), "null" => ast::ConstValue::Null }; @@ -1134,9 +1135,10 @@ Other: ast::Other = { "FloatLiteral" => ast::Other::FloatLiteral(<>), "Identifier" => ast::Other::Identifier(<>), - "IntegerLiteral" => ast::Other::IntegerLiteral(<>), "OtherLiteral" => ast::Other::OtherLiteral(<>), + "SignedIntegerLiteral" => ast::Other::SignedIntegerLiteral(<>), "StringLiteral" => ast::Other::StringLiteral(<>), + "UnsignedIntegerLiteral" => ast::Other::UnsignedIntegerLiteral(<>), ":" => ast::Other::Colon, "..." => ast::Other::Ellipsis, @@ -1558,9 +1560,10 @@ extern { // Regular expressions "FloatLiteral" => Token::FloatLiteral(), "Identifier" => Token::Identifier(), - "IntegerLiteral" => Token::IntegerLiteral(), "OtherLiteral" => Token::OtherLiteral(), + "SignedIntegerLiteral" => Token::SignedIntegerLiteral(), "StringLiteral" => Token::StringLiteral(), + "UnsignedIntegerLiteral" => Token::UnsignedIntegerLiteral(), // Symbols ":" => Token::Colon, diff --git a/src/parser/visitor/pretty_print.rs b/src/parser/visitor/pretty_print.rs index 22377c5..03a3bc1 100644 --- a/src/parser/visitor/pretty_print.rs +++ b/src/parser/visitor/pretty_print.rs @@ -328,10 +328,13 @@ impl<'ast> ImmutableVisitor<'ast> for PrettyPrintVisitor { ConstValue::FloatLiteral(float_literal) => { self.stringify_float_literal(float_literal); } - ConstValue::IntegerLiteral(integer_literal) => { + ConstValue::Null => self.output.push_str("null"), + ConstValue::SignedIntegerLiteral(integer_literal) => { + self.output.push_str(&*integer_literal.to_string()); + } + ConstValue::UnsignedIntegerLiteral(integer_literal) => { self.output.push_str(&*integer_literal.to_string()); } - ConstValue::Null => self.output.push_str("null"), } } @@ -735,15 +738,18 @@ impl<'ast> ImmutableVisitor<'ast> for PrettyPrintVisitor { self.stringify_float_literal(float_literal); } Other::Identifier(ref identifier) => self.visit_identifier(identifier), - Other::IntegerLiteral(integer_literal) => { + Other::OtherLiteral(other_literal) => self.output.push(other_literal), + Other::SignedIntegerLiteral(integer_literal) => { self.output.push_str(&*integer_literal.to_string()); } - Other::OtherLiteral(other_literal) => self.output.push(other_literal), Other::StringLiteral(ref string_literal) => { self.output.push('"'); self.output.push_str(&*string_literal); self.output.push('"'); } + Other::UnsignedIntegerLiteral(integer_literal) => { + self.output.push_str(&*integer_literal.to_string()); + } Other::Colon => self.output.push(':'), Other::Ellipsis => self.output.push_str("..."), diff --git a/tests/parse_test.rs b/tests/parse_test.rs index 745a1c2..477fa11 100644 --- a/tests/parse_test.rs +++ b/tests/parse_test.rs @@ -29,17 +29,13 @@ fn parse_includes() { assert_eq!( ast, - vec![ - Definition::Includes(Includes { - extended_attributes: vec![ - Box::new(ExtendedAttribute::NoArguments(Other::Identifier( - "test".to_string(), - ))), - ], - includee: "B".to_string(), - includer: "A".to_string(), - }), - ] + vec![Definition::Includes(Includes { + extended_attributes: vec![Box::new(ExtendedAttribute::NoArguments(Other::Identifier( + "test".to_string(), + )))], + includee: "B".to_string(), + includer: "A".to_string(), + })] ); } @@ -58,28 +54,63 @@ fn parse_mixin() { assert_eq!( ast, - vec![ - Definition::Mixin(Mixin::Partial(PartialMixin { - extended_attributes: vec![ - Box::new(ExtendedAttribute::NoArguments(Other::Identifier( - "test".to_string(), - ))), - ], + vec![Definition::Mixin(Mixin::Partial(PartialMixin { + extended_attributes: vec![Box::new(ExtendedAttribute::NoArguments(Other::Identifier( + "test".to_string(), + )))], + members: vec![MixinMember::Attribute(Attribute::Regular( + RegularAttribute { + extended_attributes: vec![], + inherits: false, + name: "entry".to_string(), + read_only: true, + type_: Box::new(Type { + extended_attributes: vec![], + kind: TypeKind::UnsignedShort, + nullable: false, + }), + }, + ))], + name: "Name".to_string(), + }))] + ); +} + +#[test] +fn parse_integer_literals() { + use ast::*; + + let ast = parse_string( + "interface Name { + const unsigned long long max = 18446744073709551615; + const long long min = -9223372036854775808; + };", + ).unwrap(); + + assert_eq!( + ast, + vec![Definition::Interface(Interface::NonPartial( + NonPartialInterface { + extended_attributes: vec![], + inherits: None, members: vec![ - MixinMember::Attribute(Attribute::Regular(RegularAttribute { + InterfaceMember::Const(Const { + extended_attributes: vec![], + name: "max".to_string(), + nullable: false, + type_: ConstType::UnsignedLongLong, + value: ConstValue::UnsignedIntegerLiteral(18446744073709551615), + }), + InterfaceMember::Const(Const { extended_attributes: vec![], - inherits: false, - name: "entry".to_string(), - read_only: true, - type_: Box::new(Type { - extended_attributes: vec![], - kind: TypeKind::UnsignedShort, - nullable: false, - }), - })), + name: "min".to_string(), + nullable: false, + type_: ConstType::SignedLongLong, + value: ConstValue::SignedIntegerLiteral(-9223372036854775808), + }), ], name: "Name".to_string(), - })), - ] + }, + ))] ); }