roup/ir/
expression.rs

1//! Expression representation and optional parsing
2//!
3//! This module provides flexible expression handling:
4//! - **Default**: Attempt to parse expressions into structured AST
5//! - **Fallback**: Keep expressions as raw strings when parsing fails
6//! - **Configurable**: Can disable parsing entirely via ParserConfig
7//!
8//! ## Learning Objectives
9//!
10//! - **Enums for alternatives**: Expression can be Parsed OR Unparsed
11//! - **Recursive structures**: ExpressionAst contains nested expressions
12//! - **Configuration patterns**: ParserConfig controls behavior
13//! - **Graceful degradation**: Complex expressions fall back to strings
14//! - **Box for indirection**: Breaking recursive type cycles
15//!
16//! ## Design Philosophy
17//!
18//! The parser supports C, C++, and Fortran - languages with very different
19//! expression syntax. Rather than trying to perfectly parse all expressions,
20//! we take a pragmatic approach:
21//!
22//! 1. Parse common simple patterns (literals, identifiers, binary ops)
23//! 2. Fall back to string representation for complex expressions
24//! 3. Always preserve the original source text
25//! 4. Let the consuming compiler handle language-specific parsing
26//!
27//! This makes the IR **useful immediately** while allowing incremental
28//! improvement of expression parsing over time.
29
30use std::fmt;
31
32use super::Language;
33
34// ============================================================================
35// Parser Configuration
36// ============================================================================
37
38/// Configuration for IR generation and expression parsing
39///
40/// This controls how the parser converts syntax to IR, particularly
41/// how it handles expressions.
42///
43/// ## Learning: Configuration Pattern
44///
45/// Rather than using global state or command-line flags, we pass
46/// configuration explicitly. This makes the code:
47/// - **Testable**: Easy to test with different configs
48/// - **Composable**: Multiple parsers with different configs
49/// - **Thread-safe**: No global mutable state
50///
51/// ## Example
52///
53/// ```
54/// use roup::ir::{ParserConfig, Language};
55///
56/// // Default: parse expressions
57/// let default_config = ParserConfig::default();
58/// assert!(default_config.parse_expressions);
59///
60/// // Custom: disable expression parsing
61/// let string_only = ParserConfig::string_only(Language::C);
62/// ```
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub struct ParserConfig {
65    /// Whether to attempt parsing expressions into structured form
66    ///
67    /// - `true` (default): Parse expressions, fall back to string on failure
68    /// - `false`: Keep all expressions as raw strings
69    pub parse_expressions: bool,
70
71    /// Source language (affects expression parsing rules)
72    ///
73    /// Different languages have different expression syntax:
74    /// - C/C++: `arr[i]`, `*ptr`, `x->y`
75    /// - Fortran: `arr(i)`, different operators
76    language: Language,
77
78    /// Whether to enable language-aware semantic parsing for clause items.
79    language_semantics: bool,
80}
81
82impl ParserConfig {
83    /// Create a new configuration
84    pub const fn new(parse_expressions: bool, language: Language) -> Self {
85        Self {
86            parse_expressions,
87            language,
88            language_semantics: true,
89        }
90    }
91
92    /// Create config that keeps all expressions as strings
93    pub const fn string_only(language: Language) -> Self {
94        Self::new(false, language)
95    }
96
97    /// Create config that parses expressions
98    pub const fn with_parsing(language: Language) -> Self {
99        Self::new(true, language)
100    }
101
102    /// Override the language for this configuration.
103    pub const fn with_language(mut self, language: Language) -> Self {
104        self.language = language;
105        self
106    }
107
108    /// Enable or disable language semantics for clause parsing.
109    pub const fn with_language_semantics(mut self, enabled: bool) -> Self {
110        self.language_semantics = enabled;
111        self
112    }
113
114    /// Return a copy of this configuration using a specific language.
115    pub const fn for_language(&self, language: Language) -> Self {
116        Self {
117            parse_expressions: self.parse_expressions,
118            language,
119            language_semantics: self.language_semantics,
120        }
121    }
122
123    /// Get the configured language.
124    pub const fn language(&self) -> Language {
125        self.language
126    }
127
128    /// Whether language semantics are enabled.
129    pub const fn language_semantics_enabled(&self) -> bool {
130        self.language_semantics
131    }
132}
133
134impl Default for ParserConfig {
135    /// Default: parse expressions, unknown language
136    fn default() -> Self {
137        Self {
138            parse_expressions: true,
139            language: Language::Unknown,
140            language_semantics: true,
141        }
142    }
143}
144
145// ============================================================================
146// Expression Types
147// ============================================================================
148
149/// An expression that may be parsed or unparsed
150///
151/// This is the core type for representing expressions in the IR.
152/// It gracefully handles both structured and unstructured forms.
153///
154/// ## Learning: Enums for Polymorphism
155///
156/// Instead of inheritance (like in C++), Rust uses enums to represent
157/// "one of several types". This is more explicit and type-safe.
158///
159/// ## Learning: Box for Recursion
160///
161/// The `Parsed` variant contains `Box<ExpressionAst>` instead of
162/// `ExpressionAst` directly. Why? Because `ExpressionAst` itself
163/// contains `Expression` values (recursion!).
164///
165/// Without `Box`, the type would have infinite size. `Box` provides
166/// indirection through a heap pointer, breaking the cycle.
167///
168/// ## Example
169///
170/// ```
171/// use roup::ir::{Expression, ParserConfig};
172///
173/// let config = ParserConfig::default();
174///
175/// // Simple expression gets parsed
176/// let simple = Expression::new("42", &config);
177/// assert!(simple.is_parsed());
178///
179/// // Complex expression falls back to string
180/// let complex = Expression::new("sizeof(struct foo)", &config);
181/// // May or may not be parsed depending on parser capability
182///
183/// // With parsing disabled, always unparsed
184/// let config_no_parse = ParserConfig::string_only(roup::ir::Language::C);
185/// let expr = Expression::new("N * 2", &config_no_parse);
186/// assert!(!expr.is_parsed());
187/// assert_eq!(expr.as_str(), "N * 2");
188/// ```
189#[derive(Debug, Clone, PartialEq)]
190pub enum Expression {
191    /// Expression was successfully parsed into structured form
192    ///
193    /// The compiler can analyze the AST structure for optimization,
194    /// validation, or transformation.
195    Parsed(Box<ExpressionAst>),
196
197    /// Expression kept as raw string
198    ///
199    /// This happens when:
200    /// - Expression parsing is disabled
201    /// - Expression is too complex for the parser
202    /// - Parser doesn't support this language construct yet
203    ///
204    /// The compiler must parse this string according to the source language.
205    Unparsed(String),
206}
207
208impl Expression {
209    /// Create a new expression, attempting to parse if enabled
210    ///
211    /// ## Example
212    ///
213    /// ```
214    /// use roup::ir::{Expression, ParserConfig, Language};
215    ///
216    /// let config = ParserConfig::default();
217    /// let expr = Expression::new("100", &config);
218    /// assert_eq!(expr.as_str(), "100");
219    /// ```
220    pub fn new(raw: impl Into<String>, config: &ParserConfig) -> Self {
221        let raw = raw.into();
222        let trimmed = raw.trim().to_string();
223
224        // If parsing disabled, return unparsed
225        if !config.parse_expressions {
226            return Expression::Unparsed(trimmed);
227        }
228
229        // Try to parse based on language
230        match parse_expression(&trimmed, config.language()) {
231            Ok(ast) => Expression::Parsed(Box::new(ast)),
232            Err(_) => Expression::Unparsed(trimmed),
233        }
234    }
235
236    /// Create an unparsed expression directly
237    ///
238    /// Useful when you know parsing will fail or you want to bypass it.
239    pub fn unparsed(raw: impl Into<String>) -> Self {
240        Expression::Unparsed(raw.into())
241    }
242
243    /// Get the raw string representation
244    ///
245    /// This always works, whether the expression is parsed or not.
246    /// The original source is always preserved.
247    pub fn as_str(&self) -> &str {
248        match self {
249            Expression::Parsed(ast) => &ast.original_source,
250            Expression::Unparsed(s) => s,
251        }
252    }
253
254    /// Check if expression was successfully parsed
255    pub const fn is_parsed(&self) -> bool {
256        matches!(self, Expression::Parsed(_))
257    }
258
259    /// Get the parsed AST if available
260    pub fn as_ast(&self) -> Option<&ExpressionAst> {
261        match self {
262            Expression::Parsed(ast) => Some(ast),
263            Expression::Unparsed(_) => None,
264        }
265    }
266}
267
268impl fmt::Display for Expression {
269    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
270        write!(f, "{}", self.as_str())
271    }
272}
273
274// ============================================================================
275// Expression AST (Structured Representation)
276// ============================================================================
277
278/// Parsed expression abstract syntax tree
279///
280/// This represents common expression patterns found in OpenMP directives.
281/// It's **not** a complete C/C++/Fortran parser, just enough to handle
282/// typical OpenMP expressions.
283///
284/// ## Learning: Recursive Data Structures
285///
286/// Notice that `ExpressionKind` contains `Box<ExpressionAst>` in several
287/// variants. This allows representing nested expressions like:
288/// - `(a + b) * c` - BinaryOp containing another BinaryOp
289/// - `arr[i][j]` - ArrayAccess containing another ArrayAccess
290///
291/// ## Example
292///
293/// ```
294/// use roup::ir::{Expression, ParserConfig};
295///
296/// let config = ParserConfig::default();
297/// let expr = Expression::new("42", &config);
298///
299/// if let Some(ast) = expr.as_ast() {
300///     // Can inspect the AST structure
301///     println!("Original: {}", ast.original_source);
302/// }
303/// ```
304#[derive(Debug, Clone, PartialEq)]
305pub struct ExpressionAst {
306    /// Original source text (always preserved)
307    pub original_source: String,
308
309    /// Parsed structure (best-effort)
310    pub kind: ExpressionKind,
311}
312
313/// Common expression patterns in OpenMP directives
314///
315/// ## Learning: Large Enums with Data
316///
317/// This enum demonstrates Rust's powerful enum system. Each variant
318/// can carry different data:
319/// - `IntLiteral(i64)` - carries an integer
320/// - `Identifier(String)` - carries an owned string
321/// - `BinaryOp { ... }` - carries multiple fields
322///
323/// This is much more powerful than C enums, which can only be simple tags.
324#[derive(Debug, Clone, PartialEq)]
325pub enum ExpressionKind {
326    /// Integer literal: `42`, `0x10`, `0b1010`
327    IntLiteral(i64),
328
329    /// Identifier: `N`, `num_threads`, `my_var`
330    Identifier(String),
331
332    /// Binary operation: `a + b`, `N * 2`, `i < 10`
333    BinaryOp {
334        left: Box<ExpressionAst>,
335        op: BinaryOperator,
336        right: Box<ExpressionAst>,
337    },
338
339    /// Unary operation: `-x`, `!flag`, `*ptr`
340    UnaryOp {
341        op: UnaryOperator,
342        operand: Box<ExpressionAst>,
343    },
344
345    /// Function call: `foo(a, b)`, `omp_get_num_threads()`
346    Call {
347        function: String,
348        args: Vec<ExpressionAst>,
349    },
350
351    /// Array subscript: `arr[i]`, `matrix[i][j]`
352    ArrayAccess {
353        array: Box<ExpressionAst>,
354        indices: Vec<ExpressionAst>,
355    },
356
357    /// Ternary conditional: `cond ? a : b`
358    Conditional {
359        condition: Box<ExpressionAst>,
360        then_expr: Box<ExpressionAst>,
361        else_expr: Box<ExpressionAst>,
362    },
363
364    /// Parenthesized: `(expr)`
365    Parenthesized(Box<ExpressionAst>),
366
367    /// Too complex to parse, kept as string
368    ///
369    /// This is our escape hatch for expressions that are valid
370    /// but not yet supported by the parser.
371    Complex(String),
372}
373
374/// Binary operators
375///
376/// ## Learning: repr(C) for C Interop
377///
378/// We use `#[repr(C)]` so these enum values are compatible with C code.
379/// Each variant gets an explicit numeric value.
380#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
381#[repr(C)]
382pub enum BinaryOperator {
383    // Arithmetic
384    Add = 0,
385    Sub = 1,
386    Mul = 2,
387    Div = 3,
388    Mod = 4,
389
390    // Comparison
391    Eq = 10,
392    Ne = 11,
393    Lt = 12,
394    Le = 13,
395    Gt = 14,
396    Ge = 15,
397
398    // Logical
399    And = 20,
400    Or = 21,
401
402    // Bitwise
403    BitwiseAnd = 30,
404    BitwiseOr = 31,
405    BitwiseXor = 32,
406    ShiftLeft = 33,
407    ShiftRight = 34,
408}
409
410/// Unary operators
411#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
412#[repr(C)]
413pub enum UnaryOperator {
414    Negate = 0,     // -x
415    LogicalNot = 1, // !x
416    BitwiseNot = 2, // ~x
417    Deref = 3,      // *ptr (C/C++)
418    AddressOf = 4,  // &var (C/C++)
419}
420
421// ============================================================================
422// Expression Parser (Isolated, Configurable)
423// ============================================================================
424
425/// Error type for expression parsing
426#[derive(Debug, Clone, PartialEq, Eq)]
427pub struct ParseError {
428    pub message: String,
429}
430
431/// Parse an expression string into an AST
432///
433/// This is **isolated** and can be disabled via config.
434/// Returns `Err` if expression is too complex or language-specific.
435///
436/// ## Learning: Error Handling with Result
437///
438/// Rust doesn't have exceptions. Instead, functions that can fail
439/// return `Result<T, E>`:
440/// - `Ok(value)` - success
441/// - `Err(error)` - failure
442///
443/// The caller must handle both cases (checked at compile time!).
444fn parse_expression(input: &str, language: Language) -> Result<ExpressionAst, ParseError> {
445    match language {
446        Language::C | Language::Cpp => parse_c_expression(input),
447        Language::Fortran => parse_fortran_expression(input),
448        Language::Unknown => parse_generic_expression(input),
449    }
450}
451
452/// Parse C/C++ expression
453///
454/// Currently falls back to generic parser. In the future, this could
455/// handle C/C++-specific constructs like `->`, `sizeof`, etc.
456fn parse_c_expression(input: &str) -> Result<ExpressionAst, ParseError> {
457    parse_generic_expression(input)
458}
459
460/// Parse Fortran expression
461///
462/// Currently falls back to generic parser. In the future, this could
463/// handle Fortran-specific constructs.
464fn parse_fortran_expression(input: &str) -> Result<ExpressionAst, ParseError> {
465    parse_generic_expression(input)
466}
467
468/// Parse simple, language-agnostic expressions
469///
470/// This handles the most common patterns:
471/// - Integer literals: `42`
472/// - Identifiers: `N`, `my_var`
473/// - Everything else: marked as `Complex`
474///
475/// This is intentionally simple. Complex parsing can be added later
476/// without changing the IR structure.
477fn parse_generic_expression(input: &str) -> Result<ExpressionAst, ParseError> {
478    let trimmed = input.trim();
479
480    // Try to parse as integer literal
481    if let Ok(value) = trimmed.parse::<i64>() {
482        return Ok(ExpressionAst {
483            original_source: input.to_string(),
484            kind: ExpressionKind::IntLiteral(value),
485        });
486    }
487
488    // Try to parse as identifier
489    if is_simple_identifier(trimmed) {
490        return Ok(ExpressionAst {
491            original_source: input.to_string(),
492            kind: ExpressionKind::Identifier(trimmed.to_string()),
493        });
494    }
495
496    // For everything else, mark as complex
497    // The consuming compiler will parse it
498    Ok(ExpressionAst {
499        original_source: input.to_string(),
500        kind: ExpressionKind::Complex(trimmed.to_string()),
501    })
502}
503
504/// Check if a string is a simple identifier
505///
506/// An identifier must:
507/// - Start with letter or underscore
508/// - Contain only letters, digits, or underscores
509fn is_simple_identifier(s: &str) -> bool {
510    if s.is_empty() {
511        return false;
512    }
513
514    let mut chars = s.chars();
515    let first = chars.next().unwrap();
516
517    // First character must be letter or underscore
518    if !first.is_alphabetic() && first != '_' {
519        return false;
520    }
521
522    // Remaining characters must be alphanumeric or underscore
523    chars.all(|c| c.is_alphanumeric() || c == '_')
524}
525
526// ============================================================================
527// Tests
528// ============================================================================
529
530#[cfg(test)]
531mod tests {
532    use super::*;
533
534    // ------------------------------------------------------------------------
535    // ParserConfig tests
536    // ------------------------------------------------------------------------
537
538    #[test]
539    fn parser_config_default_enables_parsing() {
540        let config = ParserConfig::default();
541        assert!(config.parse_expressions);
542        assert_eq!(config.language(), Language::Unknown);
543    }
544
545    #[test]
546    fn parser_config_string_only_disables_parsing() {
547        let config = ParserConfig::string_only(Language::C);
548        assert!(!config.parse_expressions);
549        assert_eq!(config.language(), Language::C);
550    }
551
552    #[test]
553    fn parser_config_with_parsing_enables_parsing() {
554        let config = ParserConfig::with_parsing(Language::Fortran);
555        assert!(config.parse_expressions);
556        assert_eq!(config.language(), Language::Fortran);
557    }
558
559    // ------------------------------------------------------------------------
560    // Expression tests
561    // ------------------------------------------------------------------------
562
563    #[test]
564    fn expression_new_parses_integer_literal() {
565        let config = ParserConfig::default();
566        let expr = Expression::new("42", &config);
567
568        assert!(expr.is_parsed());
569        assert_eq!(expr.as_str(), "42");
570
571        if let Some(ast) = expr.as_ast() {
572            assert!(matches!(ast.kind, ExpressionKind::IntLiteral(42)));
573        } else {
574            panic!("Should be parsed");
575        }
576    }
577
578    #[test]
579    fn expression_new_parses_identifier() {
580        let config = ParserConfig::default();
581        let expr = Expression::new("my_var", &config);
582
583        assert!(expr.is_parsed());
584        assert_eq!(expr.as_str(), "my_var");
585
586        if let Some(ast) = expr.as_ast() {
587            if let ExpressionKind::Identifier(name) = &ast.kind {
588                assert_eq!(name, "my_var");
589            } else {
590                panic!("Should be identifier");
591            }
592        }
593    }
594
595    #[test]
596    fn expression_new_handles_complex_as_complex() {
597        let config = ParserConfig::default();
598        let expr = Expression::new("a + b * c", &config);
599
600        // Should parse but as Complex kind
601        if let Some(ast) = expr.as_ast() {
602            assert!(matches!(ast.kind, ExpressionKind::Complex(_)));
603        }
604    }
605
606    #[test]
607    fn expression_with_parsing_disabled_stays_unparsed() {
608        let config = ParserConfig::string_only(Language::C);
609        let expr = Expression::new("42", &config);
610
611        assert!(!expr.is_parsed());
612        assert_eq!(expr.as_str(), "42");
613        assert!(expr.as_ast().is_none());
614    }
615
616    #[test]
617    fn expression_unparsed_creates_unparsed() {
618        let expr = Expression::unparsed("anything");
619
620        assert!(!expr.is_parsed());
621        assert_eq!(expr.as_str(), "anything");
622    }
623
624    #[test]
625    fn expression_preserves_original_source() {
626        let config = ParserConfig::default();
627        let expr = Expression::new("  42  ", &config);
628
629        // Trimmed version is used
630        assert_eq!(expr.as_str(), "42");
631    }
632
633    #[test]
634    fn expression_display_shows_source() {
635        let expr = Expression::unparsed("N * 2");
636        assert_eq!(format!("{expr}"), "N * 2");
637    }
638
639    // ------------------------------------------------------------------------
640    // ExpressionAst tests
641    // ------------------------------------------------------------------------
642
643    #[test]
644    fn parse_generic_expression_handles_integers() {
645        let result = parse_generic_expression("123").unwrap();
646        assert_eq!(result.original_source, "123");
647        assert!(matches!(result.kind, ExpressionKind::IntLiteral(123)));
648    }
649
650    #[test]
651    fn parse_generic_expression_handles_negative_integers() {
652        let result = parse_generic_expression("-456").unwrap();
653        // Negative integers are actually parsed successfully by parse::<i64>()
654        assert!(matches!(result.kind, ExpressionKind::IntLiteral(-456)));
655    }
656
657    #[test]
658    fn parse_generic_expression_handles_identifiers() {
659        let result = parse_generic_expression("num_threads").unwrap();
660        if let ExpressionKind::Identifier(name) = result.kind {
661            assert_eq!(name, "num_threads");
662        } else {
663            panic!("Should be identifier");
664        }
665    }
666
667    #[test]
668    fn parse_generic_expression_handles_complex() {
669        let result = parse_generic_expression("a + b").unwrap();
670        if let ExpressionKind::Complex(s) = result.kind {
671            assert_eq!(s, "a + b");
672        } else {
673            panic!("Should be complex");
674        }
675    }
676
677    // ------------------------------------------------------------------------
678    // Helper function tests
679    // ------------------------------------------------------------------------
680
681    #[test]
682    fn is_simple_identifier_accepts_valid_identifiers() {
683        assert!(is_simple_identifier("x"));
684        assert!(is_simple_identifier("my_var"));
685        assert!(is_simple_identifier("_private"));
686        assert!(is_simple_identifier("var123"));
687        assert!(is_simple_identifier("CamelCase"));
688    }
689
690    #[test]
691    fn is_simple_identifier_rejects_invalid() {
692        assert!(!is_simple_identifier(""));
693        assert!(!is_simple_identifier("123var")); // starts with digit
694        assert!(!is_simple_identifier("my-var")); // contains hyphen
695        assert!(!is_simple_identifier("my var")); // contains space
696        assert!(!is_simple_identifier("my+var")); // contains operator
697    }
698
699    // ------------------------------------------------------------------------
700    // Binary and Unary Operator tests
701    // ------------------------------------------------------------------------
702
703    #[test]
704    fn binary_operator_has_correct_discriminants() {
705        assert_eq!(BinaryOperator::Add as u32, 0);
706        assert_eq!(BinaryOperator::Eq as u32, 10);
707        assert_eq!(BinaryOperator::And as u32, 20);
708        assert_eq!(BinaryOperator::BitwiseAnd as u32, 30);
709    }
710
711    #[test]
712    fn unary_operator_has_correct_discriminants() {
713        assert_eq!(UnaryOperator::Negate as u32, 0);
714        assert_eq!(UnaryOperator::LogicalNot as u32, 1);
715        assert_eq!(UnaryOperator::AddressOf as u32, 4);
716    }
717}