roup/ir/expression.rs
1//! Expression representation and optional parsing
2//!
3//! This module provides flexible expression handling:
4//! - **Default**: Attempt to parse expressions into structured AST
5//! - **Fallback**: Keep expressions as raw strings when parsing fails
6//! - **Configurable**: Can disable parsing entirely via ParserConfig
7//!
8//! ## Learning Objectives
9//!
10//! - **Enums for alternatives**: Expression can be Parsed OR Unparsed
11//! - **Recursive structures**: ExpressionAst contains nested expressions
12//! - **Configuration patterns**: ParserConfig controls behavior
13//! - **Graceful degradation**: Complex expressions fall back to strings
14//! - **Box for indirection**: Breaking recursive type cycles
15//!
16//! ## Design Philosophy
17//!
18//! The parser supports C, C++, and Fortran - languages with very different
19//! expression syntax. Rather than trying to perfectly parse all expressions,
20//! we take a pragmatic approach:
21//!
22//! 1. Parse common simple patterns (literals, identifiers, binary ops)
23//! 2. Fall back to string representation for complex expressions
24//! 3. Always preserve the original source text
25//! 4. Let the consuming compiler handle language-specific parsing
26//!
27//! This makes the IR **useful immediately** while allowing incremental
28//! improvement of expression parsing over time.
29
30use std::fmt;
31
32use super::Language;
33
34// ============================================================================
35// Parser Configuration
36// ============================================================================
37
38/// Configuration for IR generation and expression parsing
39///
40/// This controls how the parser converts syntax to IR, particularly
41/// how it handles expressions.
42///
43/// ## Learning: Configuration Pattern
44///
45/// Rather than using global state or command-line flags, we pass
46/// configuration explicitly. This makes the code:
47/// - **Testable**: Easy to test with different configs
48/// - **Composable**: Multiple parsers with different configs
49/// - **Thread-safe**: No global mutable state
50///
51/// ## Example
52///
53/// ```
54/// use roup::ir::{ParserConfig, Language};
55///
56/// // Default: parse expressions
57/// let default_config = ParserConfig::default();
58/// assert!(default_config.parse_expressions);
59///
60/// // Custom: disable expression parsing
61/// let string_only = ParserConfig::string_only(Language::C);
62/// ```
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub struct ParserConfig {
65 /// Whether to attempt parsing expressions into structured form
66 ///
67 /// - `true` (default): Parse expressions, fall back to string on failure
68 /// - `false`: Keep all expressions as raw strings
69 pub parse_expressions: bool,
70
71 /// Source language (affects expression parsing rules)
72 ///
73 /// Different languages have different expression syntax:
74 /// - C/C++: `arr[i]`, `*ptr`, `x->y`
75 /// - Fortran: `arr(i)`, different operators
76 language: Language,
77
78 /// Whether to enable language-aware semantic parsing for clause items.
79 language_semantics: bool,
80}
81
82impl ParserConfig {
83 /// Create a new configuration
84 pub const fn new(parse_expressions: bool, language: Language) -> Self {
85 Self {
86 parse_expressions,
87 language,
88 language_semantics: true,
89 }
90 }
91
92 /// Create config that keeps all expressions as strings
93 pub const fn string_only(language: Language) -> Self {
94 Self::new(false, language)
95 }
96
97 /// Create config that parses expressions
98 pub const fn with_parsing(language: Language) -> Self {
99 Self::new(true, language)
100 }
101
102 /// Override the language for this configuration.
103 pub const fn with_language(mut self, language: Language) -> Self {
104 self.language = language;
105 self
106 }
107
108 /// Enable or disable language semantics for clause parsing.
109 pub const fn with_language_semantics(mut self, enabled: bool) -> Self {
110 self.language_semantics = enabled;
111 self
112 }
113
114 /// Return a copy of this configuration using a specific language.
115 pub const fn for_language(&self, language: Language) -> Self {
116 Self {
117 parse_expressions: self.parse_expressions,
118 language,
119 language_semantics: self.language_semantics,
120 }
121 }
122
123 /// Get the configured language.
124 pub const fn language(&self) -> Language {
125 self.language
126 }
127
128 /// Whether language semantics are enabled.
129 pub const fn language_semantics_enabled(&self) -> bool {
130 self.language_semantics
131 }
132}
133
134impl Default for ParserConfig {
135 /// Default: parse expressions, unknown language
136 fn default() -> Self {
137 Self {
138 parse_expressions: true,
139 language: Language::Unknown,
140 language_semantics: true,
141 }
142 }
143}
144
145// ============================================================================
146// Expression Types
147// ============================================================================
148
149/// An expression that may be parsed or unparsed
150///
151/// This is the core type for representing expressions in the IR.
152/// It gracefully handles both structured and unstructured forms.
153///
154/// ## Learning: Enums for Polymorphism
155///
156/// Instead of inheritance (like in C++), Rust uses enums to represent
157/// "one of several types". This is more explicit and type-safe.
158///
159/// ## Learning: Box for Recursion
160///
161/// The `Parsed` variant contains `Box<ExpressionAst>` instead of
162/// `ExpressionAst` directly. Why? Because `ExpressionAst` itself
163/// contains `Expression` values (recursion!).
164///
165/// Without `Box`, the type would have infinite size. `Box` provides
166/// indirection through a heap pointer, breaking the cycle.
167///
168/// ## Example
169///
170/// ```
171/// use roup::ir::{Expression, ParserConfig};
172///
173/// let config = ParserConfig::default();
174///
175/// // Simple expression gets parsed
176/// let simple = Expression::new("42", &config);
177/// assert!(simple.is_parsed());
178///
179/// // Complex expression falls back to string
180/// let complex = Expression::new("sizeof(struct foo)", &config);
181/// // May or may not be parsed depending on parser capability
182///
183/// // With parsing disabled, always unparsed
184/// let config_no_parse = ParserConfig::string_only(roup::ir::Language::C);
185/// let expr = Expression::new("N * 2", &config_no_parse);
186/// assert!(!expr.is_parsed());
187/// assert_eq!(expr.as_str(), "N * 2");
188/// ```
189#[derive(Debug, Clone, PartialEq)]
190pub enum Expression {
191 /// Expression was successfully parsed into structured form
192 ///
193 /// The compiler can analyze the AST structure for optimization,
194 /// validation, or transformation.
195 Parsed(Box<ExpressionAst>),
196
197 /// Expression kept as raw string
198 ///
199 /// This happens when:
200 /// - Expression parsing is disabled
201 /// - Expression is too complex for the parser
202 /// - Parser doesn't support this language construct yet
203 ///
204 /// The compiler must parse this string according to the source language.
205 Unparsed(String),
206}
207
208impl Expression {
209 /// Create a new expression, attempting to parse if enabled
210 ///
211 /// ## Example
212 ///
213 /// ```
214 /// use roup::ir::{Expression, ParserConfig, Language};
215 ///
216 /// let config = ParserConfig::default();
217 /// let expr = Expression::new("100", &config);
218 /// assert_eq!(expr.as_str(), "100");
219 /// ```
220 pub fn new(raw: impl Into<String>, config: &ParserConfig) -> Self {
221 let raw = raw.into();
222 let trimmed = raw.trim().to_string();
223
224 // If parsing disabled, return unparsed
225 if !config.parse_expressions {
226 return Expression::Unparsed(trimmed);
227 }
228
229 // Try to parse based on language
230 match parse_expression(&trimmed, config.language()) {
231 Ok(ast) => Expression::Parsed(Box::new(ast)),
232 Err(_) => Expression::Unparsed(trimmed),
233 }
234 }
235
236 /// Create an unparsed expression directly
237 ///
238 /// Useful when you know parsing will fail or you want to bypass it.
239 pub fn unparsed(raw: impl Into<String>) -> Self {
240 Expression::Unparsed(raw.into())
241 }
242
243 /// Get the raw string representation
244 ///
245 /// This always works, whether the expression is parsed or not.
246 /// The original source is always preserved.
247 pub fn as_str(&self) -> &str {
248 match self {
249 Expression::Parsed(ast) => &ast.original_source,
250 Expression::Unparsed(s) => s,
251 }
252 }
253
254 /// Check if expression was successfully parsed
255 pub const fn is_parsed(&self) -> bool {
256 matches!(self, Expression::Parsed(_))
257 }
258
259 /// Get the parsed AST if available
260 pub fn as_ast(&self) -> Option<&ExpressionAst> {
261 match self {
262 Expression::Parsed(ast) => Some(ast),
263 Expression::Unparsed(_) => None,
264 }
265 }
266}
267
268impl fmt::Display for Expression {
269 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
270 write!(f, "{}", self.as_str())
271 }
272}
273
274// ============================================================================
275// Expression AST (Structured Representation)
276// ============================================================================
277
278/// Parsed expression abstract syntax tree
279///
280/// This represents common expression patterns found in OpenMP directives.
281/// It's **not** a complete C/C++/Fortran parser, just enough to handle
282/// typical OpenMP expressions.
283///
284/// ## Learning: Recursive Data Structures
285///
286/// Notice that `ExpressionKind` contains `Box<ExpressionAst>` in several
287/// variants. This allows representing nested expressions like:
288/// - `(a + b) * c` - BinaryOp containing another BinaryOp
289/// - `arr[i][j]` - ArrayAccess containing another ArrayAccess
290///
291/// ## Example
292///
293/// ```
294/// use roup::ir::{Expression, ParserConfig};
295///
296/// let config = ParserConfig::default();
297/// let expr = Expression::new("42", &config);
298///
299/// if let Some(ast) = expr.as_ast() {
300/// // Can inspect the AST structure
301/// println!("Original: {}", ast.original_source);
302/// }
303/// ```
304#[derive(Debug, Clone, PartialEq)]
305pub struct ExpressionAst {
306 /// Original source text (always preserved)
307 pub original_source: String,
308
309 /// Parsed structure (best-effort)
310 pub kind: ExpressionKind,
311}
312
313/// Common expression patterns in OpenMP directives
314///
315/// ## Learning: Large Enums with Data
316///
317/// This enum demonstrates Rust's powerful enum system. Each variant
318/// can carry different data:
319/// - `IntLiteral(i64)` - carries an integer
320/// - `Identifier(String)` - carries an owned string
321/// - `BinaryOp { ... }` - carries multiple fields
322///
323/// This is much more powerful than C enums, which can only be simple tags.
324#[derive(Debug, Clone, PartialEq)]
325pub enum ExpressionKind {
326 /// Integer literal: `42`, `0x10`, `0b1010`
327 IntLiteral(i64),
328
329 /// Identifier: `N`, `num_threads`, `my_var`
330 Identifier(String),
331
332 /// Binary operation: `a + b`, `N * 2`, `i < 10`
333 BinaryOp {
334 left: Box<ExpressionAst>,
335 op: BinaryOperator,
336 right: Box<ExpressionAst>,
337 },
338
339 /// Unary operation: `-x`, `!flag`, `*ptr`
340 UnaryOp {
341 op: UnaryOperator,
342 operand: Box<ExpressionAst>,
343 },
344
345 /// Function call: `foo(a, b)`, `omp_get_num_threads()`
346 Call {
347 function: String,
348 args: Vec<ExpressionAst>,
349 },
350
351 /// Array subscript: `arr[i]`, `matrix[i][j]`
352 ArrayAccess {
353 array: Box<ExpressionAst>,
354 indices: Vec<ExpressionAst>,
355 },
356
357 /// Ternary conditional: `cond ? a : b`
358 Conditional {
359 condition: Box<ExpressionAst>,
360 then_expr: Box<ExpressionAst>,
361 else_expr: Box<ExpressionAst>,
362 },
363
364 /// Parenthesized: `(expr)`
365 Parenthesized(Box<ExpressionAst>),
366
367 /// Too complex to parse, kept as string
368 ///
369 /// This is our escape hatch for expressions that are valid
370 /// but not yet supported by the parser.
371 Complex(String),
372}
373
374/// Binary operators
375///
376/// ## Learning: repr(C) for C Interop
377///
378/// We use `#[repr(C)]` so these enum values are compatible with C code.
379/// Each variant gets an explicit numeric value.
380#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
381#[repr(C)]
382pub enum BinaryOperator {
383 // Arithmetic
384 Add = 0,
385 Sub = 1,
386 Mul = 2,
387 Div = 3,
388 Mod = 4,
389
390 // Comparison
391 Eq = 10,
392 Ne = 11,
393 Lt = 12,
394 Le = 13,
395 Gt = 14,
396 Ge = 15,
397
398 // Logical
399 And = 20,
400 Or = 21,
401
402 // Bitwise
403 BitwiseAnd = 30,
404 BitwiseOr = 31,
405 BitwiseXor = 32,
406 ShiftLeft = 33,
407 ShiftRight = 34,
408}
409
410/// Unary operators
411#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
412#[repr(C)]
413pub enum UnaryOperator {
414 Negate = 0, // -x
415 LogicalNot = 1, // !x
416 BitwiseNot = 2, // ~x
417 Deref = 3, // *ptr (C/C++)
418 AddressOf = 4, // &var (C/C++)
419}
420
421// ============================================================================
422// Expression Parser (Isolated, Configurable)
423// ============================================================================
424
425/// Error type for expression parsing
426#[derive(Debug, Clone, PartialEq, Eq)]
427pub struct ParseError {
428 pub message: String,
429}
430
431/// Parse an expression string into an AST
432///
433/// This is **isolated** and can be disabled via config.
434/// Returns `Err` if expression is too complex or language-specific.
435///
436/// ## Learning: Error Handling with Result
437///
438/// Rust doesn't have exceptions. Instead, functions that can fail
439/// return `Result<T, E>`:
440/// - `Ok(value)` - success
441/// - `Err(error)` - failure
442///
443/// The caller must handle both cases (checked at compile time!).
444fn parse_expression(input: &str, language: Language) -> Result<ExpressionAst, ParseError> {
445 match language {
446 Language::C | Language::Cpp => parse_c_expression(input),
447 Language::Fortran => parse_fortran_expression(input),
448 Language::Unknown => parse_generic_expression(input),
449 }
450}
451
452/// Parse C/C++ expression
453///
454/// Currently falls back to generic parser. In the future, this could
455/// handle C/C++-specific constructs like `->`, `sizeof`, etc.
456fn parse_c_expression(input: &str) -> Result<ExpressionAst, ParseError> {
457 parse_generic_expression(input)
458}
459
460/// Parse Fortran expression
461///
462/// Currently falls back to generic parser. In the future, this could
463/// handle Fortran-specific constructs.
464fn parse_fortran_expression(input: &str) -> Result<ExpressionAst, ParseError> {
465 parse_generic_expression(input)
466}
467
468/// Parse simple, language-agnostic expressions
469///
470/// This handles the most common patterns:
471/// - Integer literals: `42`
472/// - Identifiers: `N`, `my_var`
473/// - Everything else: marked as `Complex`
474///
475/// This is intentionally simple. Complex parsing can be added later
476/// without changing the IR structure.
477fn parse_generic_expression(input: &str) -> Result<ExpressionAst, ParseError> {
478 let trimmed = input.trim();
479
480 // Try to parse as integer literal
481 if let Ok(value) = trimmed.parse::<i64>() {
482 return Ok(ExpressionAst {
483 original_source: input.to_string(),
484 kind: ExpressionKind::IntLiteral(value),
485 });
486 }
487
488 // Try to parse as identifier
489 if is_simple_identifier(trimmed) {
490 return Ok(ExpressionAst {
491 original_source: input.to_string(),
492 kind: ExpressionKind::Identifier(trimmed.to_string()),
493 });
494 }
495
496 // For everything else, mark as complex
497 // The consuming compiler will parse it
498 Ok(ExpressionAst {
499 original_source: input.to_string(),
500 kind: ExpressionKind::Complex(trimmed.to_string()),
501 })
502}
503
504/// Check if a string is a simple identifier
505///
506/// An identifier must:
507/// - Start with letter or underscore
508/// - Contain only letters, digits, or underscores
509fn is_simple_identifier(s: &str) -> bool {
510 if s.is_empty() {
511 return false;
512 }
513
514 let mut chars = s.chars();
515 let first = chars.next().unwrap();
516
517 // First character must be letter or underscore
518 if !first.is_alphabetic() && first != '_' {
519 return false;
520 }
521
522 // Remaining characters must be alphanumeric or underscore
523 chars.all(|c| c.is_alphanumeric() || c == '_')
524}
525
526// ============================================================================
527// Tests
528// ============================================================================
529
530#[cfg(test)]
531mod tests {
532 use super::*;
533
534 // ------------------------------------------------------------------------
535 // ParserConfig tests
536 // ------------------------------------------------------------------------
537
538 #[test]
539 fn parser_config_default_enables_parsing() {
540 let config = ParserConfig::default();
541 assert!(config.parse_expressions);
542 assert_eq!(config.language(), Language::Unknown);
543 }
544
545 #[test]
546 fn parser_config_string_only_disables_parsing() {
547 let config = ParserConfig::string_only(Language::C);
548 assert!(!config.parse_expressions);
549 assert_eq!(config.language(), Language::C);
550 }
551
552 #[test]
553 fn parser_config_with_parsing_enables_parsing() {
554 let config = ParserConfig::with_parsing(Language::Fortran);
555 assert!(config.parse_expressions);
556 assert_eq!(config.language(), Language::Fortran);
557 }
558
559 // ------------------------------------------------------------------------
560 // Expression tests
561 // ------------------------------------------------------------------------
562
563 #[test]
564 fn expression_new_parses_integer_literal() {
565 let config = ParserConfig::default();
566 let expr = Expression::new("42", &config);
567
568 assert!(expr.is_parsed());
569 assert_eq!(expr.as_str(), "42");
570
571 if let Some(ast) = expr.as_ast() {
572 assert!(matches!(ast.kind, ExpressionKind::IntLiteral(42)));
573 } else {
574 panic!("Should be parsed");
575 }
576 }
577
578 #[test]
579 fn expression_new_parses_identifier() {
580 let config = ParserConfig::default();
581 let expr = Expression::new("my_var", &config);
582
583 assert!(expr.is_parsed());
584 assert_eq!(expr.as_str(), "my_var");
585
586 if let Some(ast) = expr.as_ast() {
587 if let ExpressionKind::Identifier(name) = &ast.kind {
588 assert_eq!(name, "my_var");
589 } else {
590 panic!("Should be identifier");
591 }
592 }
593 }
594
595 #[test]
596 fn expression_new_handles_complex_as_complex() {
597 let config = ParserConfig::default();
598 let expr = Expression::new("a + b * c", &config);
599
600 // Should parse but as Complex kind
601 if let Some(ast) = expr.as_ast() {
602 assert!(matches!(ast.kind, ExpressionKind::Complex(_)));
603 }
604 }
605
606 #[test]
607 fn expression_with_parsing_disabled_stays_unparsed() {
608 let config = ParserConfig::string_only(Language::C);
609 let expr = Expression::new("42", &config);
610
611 assert!(!expr.is_parsed());
612 assert_eq!(expr.as_str(), "42");
613 assert!(expr.as_ast().is_none());
614 }
615
616 #[test]
617 fn expression_unparsed_creates_unparsed() {
618 let expr = Expression::unparsed("anything");
619
620 assert!(!expr.is_parsed());
621 assert_eq!(expr.as_str(), "anything");
622 }
623
624 #[test]
625 fn expression_preserves_original_source() {
626 let config = ParserConfig::default();
627 let expr = Expression::new(" 42 ", &config);
628
629 // Trimmed version is used
630 assert_eq!(expr.as_str(), "42");
631 }
632
633 #[test]
634 fn expression_display_shows_source() {
635 let expr = Expression::unparsed("N * 2");
636 assert_eq!(format!("{expr}"), "N * 2");
637 }
638
639 // ------------------------------------------------------------------------
640 // ExpressionAst tests
641 // ------------------------------------------------------------------------
642
643 #[test]
644 fn parse_generic_expression_handles_integers() {
645 let result = parse_generic_expression("123").unwrap();
646 assert_eq!(result.original_source, "123");
647 assert!(matches!(result.kind, ExpressionKind::IntLiteral(123)));
648 }
649
650 #[test]
651 fn parse_generic_expression_handles_negative_integers() {
652 let result = parse_generic_expression("-456").unwrap();
653 // Negative integers are actually parsed successfully by parse::<i64>()
654 assert!(matches!(result.kind, ExpressionKind::IntLiteral(-456)));
655 }
656
657 #[test]
658 fn parse_generic_expression_handles_identifiers() {
659 let result = parse_generic_expression("num_threads").unwrap();
660 if let ExpressionKind::Identifier(name) = result.kind {
661 assert_eq!(name, "num_threads");
662 } else {
663 panic!("Should be identifier");
664 }
665 }
666
667 #[test]
668 fn parse_generic_expression_handles_complex() {
669 let result = parse_generic_expression("a + b").unwrap();
670 if let ExpressionKind::Complex(s) = result.kind {
671 assert_eq!(s, "a + b");
672 } else {
673 panic!("Should be complex");
674 }
675 }
676
677 // ------------------------------------------------------------------------
678 // Helper function tests
679 // ------------------------------------------------------------------------
680
681 #[test]
682 fn is_simple_identifier_accepts_valid_identifiers() {
683 assert!(is_simple_identifier("x"));
684 assert!(is_simple_identifier("my_var"));
685 assert!(is_simple_identifier("_private"));
686 assert!(is_simple_identifier("var123"));
687 assert!(is_simple_identifier("CamelCase"));
688 }
689
690 #[test]
691 fn is_simple_identifier_rejects_invalid() {
692 assert!(!is_simple_identifier(""));
693 assert!(!is_simple_identifier("123var")); // starts with digit
694 assert!(!is_simple_identifier("my-var")); // contains hyphen
695 assert!(!is_simple_identifier("my var")); // contains space
696 assert!(!is_simple_identifier("my+var")); // contains operator
697 }
698
699 // ------------------------------------------------------------------------
700 // Binary and Unary Operator tests
701 // ------------------------------------------------------------------------
702
703 #[test]
704 fn binary_operator_has_correct_discriminants() {
705 assert_eq!(BinaryOperator::Add as u32, 0);
706 assert_eq!(BinaryOperator::Eq as u32, 10);
707 assert_eq!(BinaryOperator::And as u32, 20);
708 assert_eq!(BinaryOperator::BitwiseAnd as u32, 30);
709 }
710
711 #[test]
712 fn unary_operator_has_correct_discriminants() {
713 assert_eq!(UnaryOperator::Negate as u32, 0);
714 assert_eq!(UnaryOperator::LogicalNot as u32, 1);
715 assert_eq!(UnaryOperator::AddressOf as u32, 4);
716 }
717}