From 0a6e1a46059de639c826e44d47e33a6440758594 Mon Sep 17 00:00:00 2001 From: Priyanshu Dangare Date: Fri, 12 Jun 2026 01:26:38 +0530 Subject: [PATCH 1/2] Add f-string preprocessing --- src/parser.rs | 8 +- src/pre_processor.rs | 232 +++++++++++++++++++++++++++++++++++++++++++ tools/sb3.py | 7 +- 3 files changed, 243 insertions(+), 4 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 46aa913..2889d13 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -68,11 +68,17 @@ fn parse_sprite(tokens: Vec) -> (Sprite, Vec) { pub fn parse(translation_unit: &TranslationUnit) -> (Sprite, Vec) { let (tokens, tokenize_diagnostics) = tokenize(translation_unit); let (tokens, preprocess_diagnostic) = preprocess(tokens); + if let Some(preprocess_diagnostic) = preprocess_diagnostic { + let all_diagnostics = tokenize_diagnostics + .into_iter() + .chain(Some(preprocess_diagnostic)) + .collect(); + return (Sprite::default(), all_diagnostics); + } let (sprite, parse_diagnostics) = parse_sprite(tokens); let all_diagnostics = tokenize_diagnostics .into_iter() - .chain(preprocess_diagnostic) .chain(parse_diagnostics) .collect(); diff --git a/src/pre_processor.rs b/src/pre_processor.rs index 54877ea..6143c52 100644 --- a/src/pre_processor.rs +++ b/src/pre_processor.rs @@ -74,6 +74,10 @@ impl<'a> PreProcessor<'a, '_> { dirty = true; continue; } + if self.substitute_fstring(span)? { + dirty = true; + continue; + } if self.substitute_stringify(span)? { dirty = true; continue; @@ -624,6 +628,152 @@ impl<'a> PreProcessor<'a, '_> { Ok(true) } + + fn substitute_fstring(&mut self, span: &mut Span) -> Result { + let Token::Name(prefix) = get_token(&self.tokens[*self.i]) else { + return Ok(false); + }; + if prefix != "f" { + return Ok(false); + } + let Some(format_token) = self.tokens.get(*self.i + 1) else { + return Ok(false); + }; + let Token::Str(format) = get_token(format_token) else { + return Ok(false); + }; + let tokens = Self::tokenize_fmt(format, get_span(format_token))?; + self.remove_token(span); + self.remove_token(span); + for (i, token) in (*self.i..).zip(tokens) { + self.tokens.insert(i, token); + span.end += 1; + } + Ok(true) + } + + fn tokenize_fmt(format: &str, span: Span) -> Result, Diagnostic> { + let mut tokens = vec![]; + let mut literal_start = 0; + let mut literal = String::new(); + let content_start = span.start + 1; + let mut chars = format.char_indices().peekable(); + while let Some((index, char)) = chars.next() { + if char == '{' { + if chars.peek().is_some_and(|(_, char)| char == &'{') { + chars.next(); + if literal.is_empty() { + literal_start = index; + } + literal.push('{'); + continue; + } + if !literal.is_empty() { + tokens.push(( + content_start + literal_start, + Token::Str(literal.clone().into()), + content_start + index, + )); + literal.clear(); + } + let expr_start = index + char.len_utf8(); + let mut expr = String::new(); + let mut closed = false; + let mut expr_end = format.len(); + while let Some((index, char)) = chars.next() { + if char == '{' && chars.peek().is_some_and(|(_, char)| char == &'{') { + chars.next(); + expr.push('{'); + continue; + } + if char == '}' && chars.peek().is_some_and(|(_, char)| char == &'}') { + let mut lookahead = chars.clone(); + lookahead.next(); + if lookahead.peek().is_some_and(|(_, char)| char == &'}') { + closed = true; + expr_end = index; + break; + } + chars.next(); + expr.push('}'); + continue; + } + if char == '}' { + closed = true; + expr_end = index; + break; + } + expr.push(char); + } + if !closed { + return Err(Diagnostic { + kind: DiagnosticKind::UnrecognizedEof(vec![]), + span, + }); + } + Self::append_join(&mut tokens); + for result in crate::lexer::adaptor::Lexer::new(&expr) { + match result { + Ok((start, token, end)) => { + tokens.push(( + content_start + expr_start + start, + token, + content_start + expr_start + end, + )); + } + Err(diagnostic) => { + return Err(Diagnostic { + kind: diagnostic.kind, + span: content_start + expr_start + diagnostic.span.start + ..content_start + expr_start + diagnostic.span.end, + }); + } + } + } + literal_start = expr_end + 1; + } else if char == '}' { + if chars.peek().is_some_and(|(_, char)| char == &'}') { + chars.next(); + if literal.is_empty() { + literal_start = index; + } + literal.push('}'); + } else { + return Err(Diagnostic { + kind: DiagnosticKind::UnrecognizedToken( + Token::RBrace, + vec!["}}".to_string()], + ), + span: content_start + index..content_start + index + char.len_utf8(), + }); + } + } else { + if literal.is_empty() { + literal_start = index; + } + literal.push(char); + } + } + if !literal.is_empty() { + Self::append_join(&mut tokens); + tokens.push(( + content_start + literal_start, + Token::Str(literal.into()), + span.end.saturating_sub(1), + )); + } + if tokens.is_empty() { + tokens.push((span.start, Token::Str("".into()), span.end)); + } + Ok(tokens) + } + + fn append_join(tokens: &mut Vec) { + if !tokens.is_empty() { + let span = tokens.last().map_or(0..0, get_span); + tokens.push((span.end, Token::Amp, span.end)); + } + } } #[cfg(test)] @@ -670,4 +820,86 @@ mod tests { assert!(result.is_err()); assert!(!tokens.is_empty()); } + + #[test] + fn fmt_interpolates_expression_between_literals() { + let tokens = preprocess(r#"f"({x})""#).unwrap(); + + assert_eq!( + tokens, + vec![ + Token::Str("(".into()), + Token::Amp, + Token::Name("x".into()), + Token::Amp, + Token::Str(")".into()) + ] + ); + } + + #[test] + fn fmt_interpolates_adjacent_expressions() { + let tokens = preprocess(r#"f"{x}{y}""#).unwrap(); + + assert_eq!( + tokens, + vec![Token::Name("x".into()), Token::Amp, Token::Name("y".into())] + ); + } + + #[test] + fn fmt_tokenizes_expression_contents() { + let tokens = preprocess(r#"f"x = {x + 1}""#).unwrap(); + + assert_eq!( + tokens, + vec![ + Token::Str("x = ".into()), + Token::Amp, + Token::Name("x".into()), + Token::Plus, + Token::Int(1) + ] + ); + } + + #[test] + fn fmt_tokenizes_nested_fstrings() { + let tokens = preprocess(r#"f"{1+f\"{{x}}\"}""#).unwrap(); + + assert_eq!( + tokens, + vec![Token::Int(1), Token::Plus, Token::Name("x".into())] + ); + } + + #[test] + fn fmt_escapes_literal_braces() { + let tokens = preprocess(r#"f"{{{x}}}""#).unwrap(); + + assert_eq!( + tokens, + vec![ + Token::Str("{".into()), + Token::Amp, + Token::Name("x".into()), + Token::Amp, + Token::Str("}".into()) + ] + ); + } + + #[test] + fn fmt_expands_empty_string() { + let tokens = preprocess(r#"f"""#).unwrap(); + + assert_eq!(tokens, vec![Token::Str("".into())]); + } + + #[test] + fn fmt_errors_for_unterminated_interpolation() { + let result = preprocess(r#"f"{x""#); + + assert!(result.is_err()); + } } diff --git a/tools/sb3.py b/tools/sb3.py index 85d72b1..0ba0a17 100755 --- a/tools/sb3.py +++ b/tools/sb3.py @@ -64,10 +64,11 @@ subprocess.run(["delta", *extra, "--side-by-side", *pathids]) if args.validate: - sb3ts = Path(__file__).parent.joinpath("sb3.ts") - tools = Path(__file__).parent + sb3ts = Path(__file__).parent.joinpath("sb3.ts").resolve() + tools = Path(__file__).parent.resolve() + tsx = tools.joinpath("node_modules/.bin/tsx") for pathid in pathids: if returncode := subprocess.run( - ["pnpm", "--dir", tools, "exec", "tsx", sb3ts, pathid] + [tsx, sb3ts, pathid.resolve()] ).returncode: sys.exit(returncode) From 7600838cf1d38d638a4f07ba8735858e3d805b9c Mon Sep 17 00:00:00 2001 From: Priyanshu Dangare Date: Fri, 12 Jun 2026 01:32:17 +0530 Subject: [PATCH 2/2] Document f-strings --- docs/language/macros.md | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/docs/language/macros.md b/docs/language/macros.md index 20e73b3..65095b1 100644 --- a/docs/language/macros.md +++ b/docs/language/macros.md @@ -137,3 +137,32 @@ Nested parentheses are supported and are included verbatim in the resulting stri ```goboscript STRINGIFY(foo(bar, baz)) # becomes "foo ( bar , baz )" ``` + +## Format Strings + +Format strings interpolate expressions into strings at compile time. Prefix a string +literal with `f` and write expressions inside `{}`. The preprocessor expands the string +into an expression joined with `&`. + +```goboscript +say f"Hello, {name}!"; # becomes "Hello, " & name & "!" +``` + +Any goboscript expression can be used inside `{}`: + +```goboscript +say f"Score: {score + bonus}"; +say f"Position: ({x}, {y})"; +``` + +Use `{{` and `}}` to include literal braces: + +```goboscript +say f"{{{name}}}"; # becomes "{" & name & "}" +``` + +Nested format strings must escape the braces that belong to the inner string: + +```goboscript +say f"{1 + f\"{{name}}\"}"; +```