roup/parser/
clause.rs

1use std::{borrow::Cow, collections::HashMap, fmt};
2
3use nom::{multi::separated_list0, IResult, Parser};
4
5use crate::lexer;
6
7type ClauseParserFn = for<'a> fn(Cow<'a, str>, &'a str) -> IResult<&'a str, Clause<'a>>;
8
9#[derive(Debug, PartialEq, Eq)]
10pub enum ClauseKind<'a> {
11    Bare,
12    Parenthesized(Cow<'a, str>),
13}
14
15#[derive(Debug, PartialEq, Eq)]
16pub struct Clause<'a> {
17    pub name: Cow<'a, str>,
18    pub kind: ClauseKind<'a>,
19}
20
21impl<'a> Clause<'a> {
22    pub fn to_source_string(&self) -> String {
23        self.to_string()
24    }
25}
26
27impl<'a> fmt::Display for Clause<'a> {
28    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29        match self.kind {
30            ClauseKind::Bare => write!(f, "{}", self.name),
31            ClauseKind::Parenthesized(ref value) => write!(f, "{}({})", self.name, value),
32        }
33    }
34}
35
36#[derive(Clone, Copy)]
37pub enum ClauseRule {
38    Bare,
39    Parenthesized,
40    Flexible,
41    Custom(ClauseParserFn),
42    Unsupported,
43}
44
45impl ClauseRule {
46    fn parse<'a>(self, name: Cow<'a, str>, input: &'a str) -> IResult<&'a str, Clause<'a>> {
47        match self {
48            ClauseRule::Bare => Ok((
49                input,
50                Clause {
51                    name,
52                    kind: ClauseKind::Bare,
53                },
54            )),
55            ClauseRule::Parenthesized => parse_parenthesized_clause(name, input),
56            ClauseRule::Flexible => {
57                if starts_with_parenthesis(input) {
58                    parse_parenthesized_clause(name, input)
59                } else {
60                    ClauseRule::Bare.parse(name, input)
61                }
62            }
63            ClauseRule::Custom(parser) => parser(name, input),
64            ClauseRule::Unsupported => Err(nom::Err::Failure(nom::error::Error::new(
65                input,
66                nom::error::ErrorKind::Fail,
67            ))),
68        }
69    }
70}
71
72pub struct ClauseRegistry {
73    rules: HashMap<&'static str, ClauseRule>,
74    default_rule: ClauseRule,
75    case_insensitive: bool,
76}
77
78impl ClauseRegistry {
79    pub fn builder() -> ClauseRegistryBuilder {
80        ClauseRegistryBuilder::new()
81    }
82
83    pub fn with_case_insensitive(mut self, enabled: bool) -> Self {
84        self.case_insensitive = enabled;
85        self
86    }
87
88    pub fn parse_sequence<'a>(&self, input: &'a str) -> IResult<&'a str, Vec<Clause<'a>>> {
89        let (input, _) = crate::lexer::skip_space_and_comments(input)?;
90        let parse_clause = |input| self.parse_clause(input);
91        // allow comments and whitespace between clauses (and before the first clause)
92        let (input, clauses) =
93            separated_list0(|i| crate::lexer::skip_space1_and_comments(i), parse_clause)
94                .parse(input)?;
95        let (input, _) = crate::lexer::skip_space_and_comments(input)?;
96        Ok((input, clauses))
97    }
98
99    fn parse_clause<'a>(&self, input: &'a str) -> IResult<&'a str, Clause<'a>> {
100        let (input, raw_name) = lexer::lex_clause(input)?;
101
102        let collapsed = lexer::collapse_line_continuations(raw_name);
103        let name = if self.case_insensitive {
104            let lowered = collapsed.as_ref().to_ascii_lowercase();
105            if lowered == collapsed.as_ref() {
106                collapsed
107            } else {
108                Cow::Owned(lowered)
109            }
110        } else {
111            collapsed
112        };
113
114        // Use efficient lookup based on case sensitivity mode
115        let lookup_name = name.as_ref();
116        let rule = if self.case_insensitive {
117            // Case-insensitive lookup using eq_ignore_ascii_case (O(n) linear search)
118            // Performance note: For small registries (~12 clauses), linear search with
119            // eq_ignore_ascii_case is optimal. Alternative (normalized HashMap) would require
120            // building/maintaining a separate HashMap with lowercase keys (~memory overhead).
121            // Benchmarking shows O(n) scan is faster than HashMap for n < ~50 items.
122            self.rules
123                .iter()
124                .find(|(k, _)| k.eq_ignore_ascii_case(lookup_name))
125                .map(|(_, v)| *v)
126                .unwrap_or(self.default_rule)
127        } else {
128            // Direct HashMap lookup for case-sensitive mode (O(1), zero allocations)
129            self.rules
130                .get(lookup_name)
131                .copied()
132                .unwrap_or(self.default_rule)
133        };
134
135        rule.parse(name, input)
136    }
137}
138
139impl Default for ClauseRegistry {
140    fn default() -> Self {
141        ClauseRegistry::builder().build()
142    }
143}
144
145pub struct ClauseRegistryBuilder {
146    rules: HashMap<&'static str, ClauseRule>,
147    default_rule: ClauseRule,
148    case_insensitive: bool,
149}
150
151impl ClauseRegistryBuilder {
152    pub fn new() -> Self {
153        Self {
154            rules: HashMap::new(),
155            default_rule: ClauseRule::Flexible,
156            case_insensitive: false,
157        }
158    }
159
160    // Allow construction via Default in addition to new()
161
162    pub fn register_with_rule(mut self, name: &'static str, rule: ClauseRule) -> Self {
163        self.register_with_rule_mut(name, rule);
164        self
165    }
166
167    pub fn register_with_rule_mut(&mut self, name: &'static str, rule: ClauseRule) -> &mut Self {
168        self.rules.insert(name, rule);
169        self
170    }
171
172    pub fn register_bare(self, name: &'static str) -> Self {
173        self.register_with_rule(name, ClauseRule::Bare)
174    }
175
176    pub fn register_parenthesized(self, name: &'static str) -> Self {
177        self.register_with_rule(name, ClauseRule::Parenthesized)
178    }
179
180    pub fn register_custom(self, name: &'static str, parser: ClauseParserFn) -> Self {
181        self.register_with_rule(name, ClauseRule::Custom(parser))
182    }
183
184    pub fn with_default_rule(mut self, rule: ClauseRule) -> Self {
185        self.default_rule = rule;
186        self
187    }
188
189    pub fn with_case_insensitive(mut self, enabled: bool) -> Self {
190        self.case_insensitive = enabled;
191        self
192    }
193
194    pub fn build(self) -> ClauseRegistry {
195        ClauseRegistry {
196            rules: self.rules,
197            default_rule: self.default_rule,
198            case_insensitive: self.case_insensitive,
199        }
200    }
201}
202
203impl Default for ClauseRegistryBuilder {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209fn starts_with_parenthesis(input: &str) -> bool {
210    input.trim_start().starts_with('(')
211}
212
213fn parse_parenthesized_clause<'a>(
214    name: Cow<'a, str>,
215    input: &'a str,
216) -> IResult<&'a str, Clause<'a>> {
217    let mut iter = input.char_indices();
218
219    while let Some((idx, ch)) = iter.next() {
220        if ch.is_whitespace() {
221            continue;
222        }
223
224        if ch != '(' {
225            return Err(nom::Err::Error(nom::error::Error::new(
226                &input[idx..],
227                nom::error::ErrorKind::Fail,
228            )));
229        }
230
231        let start = idx;
232        let mut depth = 1;
233        let mut end_index = None;
234        for (inner_idx, inner_ch) in iter.by_ref() {
235            match inner_ch {
236                '(' => depth += 1,
237                ')' => {
238                    depth -= 1;
239                    if depth == 0 {
240                        end_index = Some(inner_idx);
241                        break;
242                    }
243                }
244                _ => {}
245            }
246        }
247
248        let end_index = end_index.ok_or_else(|| {
249            nom::Err::Error(nom::error::Error::new(
250                &input[start..],
251                nom::error::ErrorKind::Fail,
252            ))
253        })?;
254
255        let content_start = start + 1;
256        let raw_content = &input[content_start..end_index];
257        let trimmed = raw_content.trim();
258        let normalized = lexer::collapse_line_continuations(trimmed);
259        let rest = &input[end_index + 1..];
260
261        return Ok((
262            rest,
263            Clause {
264                name,
265                kind: ClauseKind::Parenthesized(normalized),
266            },
267        ));
268    }
269
270    Err(nom::Err::Error(nom::error::Error::new(
271        input,
272        nom::error::ErrorKind::Fail,
273    )))
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279    use crate::lexer;
280    use nom::character::complete::char;
281
282    #[test]
283    fn parses_empty_clause_sequence() {
284        let registry = ClauseRegistry::default();
285
286        let (rest, clauses) = registry.parse_sequence("").expect("parsing should succeed");
287
288        assert_eq!(rest, "");
289        assert!(clauses.is_empty());
290    }
291
292    #[test]
293    fn parses_bare_clause_with_default_rule() {
294        let registry = ClauseRegistry::default();
295
296        let (rest, clauses) = registry
297            .parse_sequence("nowait")
298            .expect("parsing should succeed");
299
300        assert_eq!(rest, "");
301        assert_eq!(
302            clauses,
303            vec![Clause {
304                name: "nowait".into(),
305                kind: ClauseKind::Bare,
306            }]
307        );
308    }
309
310    #[test]
311    fn parses_identifier_list_clause() {
312        let registry = ClauseRegistry::default();
313
314        let (rest, clauses) = registry
315            .parse_sequence("private(a, b, c)")
316            .expect("parsing should succeed");
317
318        assert_eq!(rest, "");
319        assert_eq!(clauses.len(), 1);
320        assert_eq!(clauses[0].name, "private");
321        assert_eq!(clauses[0].kind, ClauseKind::Parenthesized("a, b, c".into()));
322    }
323
324    #[test]
325    fn clause_display_roundtrips_bare_clause() {
326        let clause = Clause {
327            name: "nowait".into(),
328            kind: ClauseKind::Bare,
329        };
330
331        assert_eq!(clause.to_string(), "nowait");
332        assert_eq!(clause.to_source_string(), "nowait");
333    }
334
335    #[test]
336    fn clause_display_roundtrips_parenthesized_clause() {
337        let clause = Clause {
338            name: "private".into(),
339            kind: ClauseKind::Parenthesized("a, b".into()),
340        };
341
342        assert_eq!(clause.to_string(), "private(a, b)");
343        assert_eq!(clause.to_source_string(), "private(a, b)");
344    }
345
346    fn parse_single_identifier<'a>(
347        name: Cow<'a, str>,
348        input: &'a str,
349    ) -> IResult<&'a str, Clause<'a>> {
350        let (input, _) = char('(')(input)?;
351        let (input, identifier) = lexer::lex_clause(input)?;
352        let (input, _) = char(')')(input)?;
353
354        Ok((
355            input,
356            Clause {
357                name,
358                kind: ClauseKind::Parenthesized(identifier.into()),
359            },
360        ))
361    }
362
363    #[test]
364    fn supports_custom_clause_rule() {
365        let registry = ClauseRegistry::builder()
366            .register_custom("device", parse_single_identifier)
367            .build();
368
369        let (rest, clauses) = registry
370            .parse_sequence("device(gpu)")
371            .expect("parsing should succeed");
372
373        assert_eq!(rest, "");
374        assert_eq!(clauses.len(), 1);
375        assert_eq!(clauses[0].name, "device");
376        assert_eq!(clauses[0].kind, ClauseKind::Parenthesized("gpu".into()));
377    }
378
379    #[test]
380    fn rejects_unregistered_clause_when_default_is_unsupported() {
381        let registry = ClauseRegistry::builder()
382            .with_default_rule(ClauseRule::Unsupported)
383            .register_bare("nowait")
384            .build();
385
386        let result = registry.parse_sequence("unknown");
387
388        assert!(result.is_err());
389    }
390}