diff --git a/Cargo.lock b/Cargo.lock index 73c35e9d..919907cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -911,6 +911,8 @@ dependencies = [ "thiserror 2.0.18", "tokio", "tracing", + "tree-sitter", + "tree-sitter-cpp", "walkdir", "zip", ] @@ -2968,6 +2970,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ + "indexmap", "itoa", "memchr", "serde", @@ -3148,6 +3151,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.11.1" @@ -3627,6 +3636,36 @@ dependencies = [ "syn", ] +[[package]] +name = "tree-sitter" +version = "0.26.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dab76d0b724ba557954125188cf0633a1ca43199ced82d95c7b9c32cc3de1f3" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + [[package]] name = "try-lock" version = "0.2.5" diff --git a/Cargo.toml b/Cargo.toml index 7a272e1f..08f45315 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,8 @@ filetime = "0.2" sha2 = "0.10" regex = "1" walkdir = "2" +tree-sitter = "0.26.9" +tree-sitter-cpp = "0.23.4" flate2 = "1" tar = "0.4" zip = { version = "2", default-features = false, features = ["deflate"] } diff --git a/crates/fbuild-build/Cargo.toml b/crates/fbuild-build/Cargo.toml index 4ceed086..70b0263d 100644 --- a/crates/fbuild-build/Cargo.toml +++ b/crates/fbuild-build/Cargo.toml @@ -29,6 +29,8 @@ reqwest = { workspace = true } flate2 = { workspace = true } tar = { workspace = true } zip = { workspace = true } +tree-sitter = { workspace = true } +tree-sitter-cpp = { workspace = true } [dev-dependencies] filetime = { workspace = true } diff --git a/crates/fbuild-build/src/source_scanner.rs b/crates/fbuild-build/src/source_scanner.rs index fc52934d..c3ecf460 100644 --- a/crates/fbuild-build/src/source_scanner.rs +++ b/crates/fbuild-build/src/source_scanner.rs @@ -8,6 +8,7 @@ use regex::Regex; use std::cmp::Ordering; use std::collections::HashSet; use std::path::{Path, PathBuf}; +use tree_sitter::{Node, Parser}; use walkdir::WalkDir; /// Collection of source files found by the scanner. @@ -575,56 +576,270 @@ fn walk_sources(dir: &Path) -> Vec { files } -/// Extract function prototypes from concatenated .ino source. -/// -/// Finds function definitions and generates forward declarations. +/// Extract function prototypes from concatenated .ino source using a C++ parser. pub fn extract_function_prototypes(source: &str) -> Vec { - let func_re = - Regex::new(r"(?m)^([a-zA-Z_][\w\s\*&:<>,]*?)\s+([a-zA-Z_]\w*)\s*\(([^)]*)\)\s*\{").unwrap(); - - // Keywords that look like function definitions but aren't - let skip_keywords: HashSet<&str> = ["if", "while", "for", "switch", "catch", "else"] - .iter() - .copied() - .collect(); + let Some(tree) = parse_cpp_source(source) else { + return Vec::new(); + }; - let mut prototypes = Vec::new(); + let mut raw_prototypes = Vec::new(); + collect_function_prototypes(tree.root_node(), source, &mut raw_prototypes); let mut seen = HashSet::new(); + raw_prototypes + .into_iter() + .filter(|proto| seen.insert(proto.clone())) + .collect() +} + +/// Find existing forward declarations in source. +fn find_existing_forward_declarations(source: &str) -> Vec { + let Some(tree) = parse_cpp_source(source) else { + return Vec::new(); + }; - for cap in func_re.captures_iter(source) { - let return_type = cap[1].trim(); - let func_name = &cap[2]; - let params = cap[3].trim(); + let mut declarations = Vec::new(); + collect_forward_declarations(tree.root_node(), source, &mut declarations); + declarations +} - if skip_keywords.contains(func_name) { - continue; +fn parse_cpp_source(source: &str) -> Option { + let mut parser = Parser::new(); + let language = tree_sitter_cpp::LANGUAGE.into(); + parser.set_language(&language).ok()?; + parser.parse(source, None) +} + +fn collect_function_prototypes(node: Node<'_>, source: &str, prototypes: &mut Vec) { + if node.kind() == "function_definition" { + if let Some(prototype) = prototype_from_function_definition(node, source) { + prototypes.push(prototype); } + return; + } - // Skip if it looks like a macro or class method - if return_type.contains('#') || return_type.contains("::") { - continue; + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_function_prototypes(child, source, prototypes); + } +} + +fn prototype_from_function_definition(node: Node<'_>, source: &str) -> Option { + if has_skipped_function_context(node) { + return None; + } + + let signature_node = node + .parent() + .filter(|parent| parent.kind() == "template_declaration") + .unwrap_or(node); + let body = node.child_by_field_name("body")?; + let signature_start = signature_node.start_byte(); + let signature = source.get(signature_start..body.start_byte())?; + let parameter_list = find_descendant_kind(node, "parameter_list")?; + let params_start = parameter_list.start_byte().checked_sub(signature_start)?; + let params_end = parameter_list.end_byte().checked_sub(signature_start)?; + let signature = strip_default_arguments(signature, params_start, params_end); + let signature = normalize_signature(&signature)?; + + if signature.contains("::") || signature.starts_with('#') { + return None; + } + + Some(signature) +} + +fn has_skipped_function_context(node: Node<'_>) -> bool { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "namespace_definition" + | "class_specifier" + | "struct_specifier" + | "union_specifier" + | "field_declaration_list" => return true, + _ => current = parent.parent(), } + } + false +} - let proto = format!("{} {}({})", return_type, func_name, params); - if seen.insert(proto.clone()) { - prototypes.push(proto); +fn collect_forward_declarations(node: Node<'_>, source: &str, declarations: &mut Vec) { + if node.kind() == "declaration" + && !has_skipped_function_context(node) + && has_descendant_kind(node, "function_declarator") + { + if let Some(text) = source.get(node.start_byte()..node.end_byte()) { + let declaration = text.trim(); + if declaration.ends_with(';') && !declaration.contains("::") { + declarations.push(declaration.to_string()); + } } + return; } - prototypes + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + collect_forward_declarations(child, source, declarations); + } } -/// Find existing forward declarations in source (lines ending with `);` -/// that look like function prototypes). -fn find_existing_forward_declarations(source: &str) -> Vec { - let decl_re = - Regex::new(r"(?m)^([a-zA-Z_][\w\s\*&:<>,]*?)\s+([a-zA-Z_]\w*)\s*\([^)]*\)\s*;\s*$") - .unwrap(); +fn has_descendant_kind(node: Node<'_>, kind: &str) -> bool { + find_descendant_kind(node, kind).is_some() +} - decl_re - .find_iter(source) - .map(|m| m.as_str().to_string()) - .collect() +fn find_descendant_kind<'a>(node: Node<'a>, kind: &str) -> Option> { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == kind { + return Some(child); + } + if let Some(found) = find_descendant_kind(child, kind) { + return Some(found); + } + } + None +} + +fn normalize_signature(signature: &str) -> Option { + let lines: Vec<&str> = signature + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + .collect(); + if lines.is_empty() { + return None; + } + Some(lines.join(" ")) +} + +fn strip_default_arguments(signature: &str, params_start: usize, params_end: usize) -> String { + let Some(params) = signature.get(params_start..params_end) else { + return signature.to_string(); + }; + let Some(params_inner) = params.strip_prefix('(').and_then(|p| p.strip_suffix(')')) else { + return signature.to_string(); + }; + + let mut output = String::new(); + output.push_str(&signature[..params_start + 1]); + output.push_str(&strip_defaults_from_params(params_inner)); + output.push_str(&signature[params_end - 1..]); + output +} + +fn strip_defaults_from_params(params: &str) -> String { + let mut output = String::new(); + let mut skip_default = false; + let mut paren_depth = 0usize; + let mut bracket_depth = 0usize; + let mut brace_depth = 0usize; + let mut angle_depth = 0usize; + let mut quote: Option = None; + let mut escaped = false; + + for ch in params.chars() { + if let Some(quote_char) = quote { + if !skip_default { + output.push(ch); + } + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote_char { + quote = None; + } + continue; + } + + match ch { + '"' | '\'' => { + if !skip_default { + output.push(ch); + } + quote = Some(ch); + } + '(' => { + paren_depth += 1; + if !skip_default { + output.push(ch); + } + } + ')' => { + paren_depth = paren_depth.saturating_sub(1); + if !skip_default { + output.push(ch); + } + } + '[' => { + bracket_depth += 1; + if !skip_default { + output.push(ch); + } + } + ']' => { + bracket_depth = bracket_depth.saturating_sub(1); + if !skip_default { + output.push(ch); + } + } + '{' => { + brace_depth += 1; + if !skip_default { + output.push(ch); + } + } + '}' => { + brace_depth = brace_depth.saturating_sub(1); + if !skip_default { + output.push(ch); + } + } + '<' => { + angle_depth += 1; + if !skip_default { + output.push(ch); + } + } + '>' => { + angle_depth = angle_depth.saturating_sub(1); + if !skip_default { + output.push(ch); + } + } + '=' if paren_depth == 0 + && bracket_depth == 0 + && brace_depth == 0 + && angle_depth == 0 => + { + skip_default = true; + trim_trailing_spaces(&mut output); + } + ',' if paren_depth == 0 + && bracket_depth == 0 + && brace_depth == 0 + && angle_depth == 0 => + { + skip_default = false; + trim_trailing_spaces(&mut output); + output.push(ch); + } + _ => { + if !skip_default { + output.push(ch); + } + } + } + } + + trim_trailing_spaces(&mut output); + output +} + +fn trim_trailing_spaces(text: &mut String) { + while text.chars().last().is_some_and(char::is_whitespace) { + text.pop(); + } } #[cfg(test)] @@ -912,11 +1127,61 @@ mod tests { } #[test] - fn test_prototype_extraction_skips_keywords() { - let source = "void setup() {\n if (true) {\n }\n while (false) {\n }\n}\n"; + fn test_prototype_extraction_handles_complex_cpp_signatures() { + let source = r#" +template +T twice(T value) { + return value + value; +} + +[[nodiscard]] const char* label(const char* fallback = "demo") { + return fallback; +} + +int& ref_value(int& value) { + return value; +} +"#; + let protos = extract_function_prototypes(source); + assert!(protos.contains(&"template T twice(T value)".to_string())); + assert!( + protos.contains(&"[[nodiscard]] const char* label(const char* fallback)".to_string()) + ); + assert!(protos.contains(&"int& ref_value(int& value)".to_string())); + assert!(!protos.iter().any(|p| p.contains("= \"demo\""))); + } + + #[test] + fn test_prototype_extraction_skips_non_free_functions() { + let source = r#" +#define MAKE_FUNC(name) void name() {} + +void setup() { + if (true) { + } + while (false) { + } + auto callback = []() { return 1; }; +} + +class Controller { + void tick() {} +}; + +namespace hidden { +void helper() {} +} + +void Controller::external_tick() {} +"#; let protos = extract_function_prototypes(source); + assert!(protos.iter().any(|p| p == "void setup()")); assert!(!protos.iter().any(|p| p.contains("if"))); assert!(!protos.iter().any(|p| p.contains("while"))); + assert!(!protos.iter().any(|p| p.contains("callback"))); + assert!(!protos.iter().any(|p| p.contains("tick"))); + assert!(!protos.iter().any(|p| p.contains("helper"))); + assert!(!protos.iter().any(|p| p.contains("MAKE_FUNC"))); } #[test] diff --git a/docs/reference/platformio-compatibility.md b/docs/reference/platformio-compatibility.md index b12400be..1760ef26 100644 --- a/docs/reference/platformio-compatibility.md +++ b/docs/reference/platformio-compatibility.md @@ -92,3 +92,16 @@ References: - PlatformIO `InoToCPPConverter`: + +## `.ino` Prototype Generation + +Arduino-style `.ino` preprocessing inserts forward declarations for free +functions so sketches can call functions before their definitions. fbuild uses +an embedded tree-sitter C++ parser for this step instead of regex matching. + +This keeps prototype generation structural enough to avoid control-flow blocks, +lambdas, class methods, macro fragments, scoped `Class::method` definitions, and +namespace-local definitions. It also handles common C++ signature syntax such as +templates, attributes, references, and default arguments. + +No clang or libclang runtime install is required for this preprocessing path.