cqlsh_rs/
parser.rs

1//! Statement parser for cqlsh-rs.
2//!
3//! Handles multi-line input buffering, semicolon-terminated statement detection,
4//! comment stripping, string literal handling, and routing between CQL statements
5//! and built-in shell commands.
6//!
7//! Key design decisions (from SP4 and SP16 upstream fixes):
8//! - Context-aware tokenization: NO regex preprocessing for comments (PR #150)
9//! - Truly incremental parsing: O(n) total work via scan_offset tracking (PR #151)
10
11/// Lexer context states for tracking position within CQL input.
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
13enum LexState {
14    /// Normal CQL code (not in string or comment).
15    #[default]
16    Normal,
17    /// Inside a single-quoted string literal (`'...'`).
18    SingleQuote,
19    /// Inside a double-quoted identifier (`"..."`).
20    DoubleQuote,
21    /// Inside a dollar-quoted string literal (`$$...$$`).
22    DollarQuote,
23    /// Inside a block comment (`/* ... */`).
24    BlockComment,
25    /// Inside a line comment (`-- ...`), extends to end of line.
26    LineComment,
27}
28
29/// Incremental statement parser.
30///
31/// Tracks lexer state across `feed_line` calls so that each call only scans
32/// the newly appended bytes. Total work is O(n) over the lifetime of the parser,
33/// not O(n²). See PR #151 for why this matters.
34#[derive(Debug, Default)]
35pub struct StatementParser {
36    /// Accumulated input buffer.
37    buffer: String,
38    /// Byte offset in `buffer` where the next scan should resume.
39    scan_offset: usize,
40    /// Byte offset of the start of the current (in-progress) statement.
41    stmt_start: usize,
42    /// Current lexer state at `scan_offset`.
43    state: LexState,
44    /// Depth of nested block comments.
45    block_comment_depth: usize,
46    /// True when we are inside a `BEGIN BATCH … APPLY BATCH` block.
47    /// Semicolons inside a batch do not terminate the batch statement.
48    in_batch: bool,
49}
50
51/// The result of feeding a line to the parser.
52#[derive(Debug, PartialEq, Eq)]
53#[must_use]
54pub enum ParseResult {
55    /// No complete statement yet; continue accumulating.
56    Incomplete,
57    /// One or more complete statements extracted.
58    Complete(Vec<String>),
59}
60
61/// Classification of a parsed input line.
62#[derive(Debug, PartialEq, Eq)]
63#[must_use]
64pub enum InputKind {
65    /// A built-in shell command (HELP, QUIT, DESCRIBE, etc.).
66    ShellCommand(String),
67    /// A CQL statement to forward to the driver.
68    CqlStatement(String),
69    /// Empty or whitespace-only input.
70    Empty,
71}
72
73/// Built-in shell commands that don't require a semicolon terminator.
74const SHELL_COMMANDS: &[&str] = &[
75    "HELP",
76    "?",
77    "QUIT",
78    "EXIT",
79    "DESCRIBE",
80    "DESC",
81    "CONSISTENCY",
82    "SERIAL",
83    "TRACING",
84    "EXPAND",
85    "PAGING",
86    "LOGIN",
87    "SOURCE",
88    "CAPTURE",
89    "SHOW",
90    "CLEAR",
91    "CLS",
92    "UNICODE",
93    "DEBUG",
94    "COPY",
95    "USE",
96];
97
98impl StatementParser {
99    /// Create a new empty parser.
100    #[must_use]
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Reset the parser, discarding any accumulated input.
106    pub fn reset(&mut self) {
107        self.buffer.clear();
108        self.scan_offset = 0;
109        self.stmt_start = 0;
110        self.state = LexState::Normal;
111        self.block_comment_depth = 0;
112        self.in_batch = false;
113    }
114
115    /// Returns true if the parser has no accumulated input.
116    #[must_use]
117    pub fn is_empty(&self) -> bool {
118        self.buffer.is_empty()
119    }
120
121    /// Returns the remaining unparsed content in the buffer.
122    #[must_use]
123    pub fn remaining(&self) -> &str {
124        &self.buffer[self.stmt_start..]
125    }
126
127    /// Feed a line of input and return any complete statements.
128    ///
129    /// This is the incremental entry point. Each call scans only the newly
130    /// appended bytes, preserving lexer state from the previous call.
131    /// Total work across all `feed_line` calls is O(n).
132    pub fn feed_line(&mut self, line: &str) -> ParseResult {
133        if !self.buffer.is_empty() {
134            self.buffer.push('\n');
135        }
136        self.buffer.push_str(line);
137
138        self.scan_for_statements()
139    }
140
141    /// Scan from `scan_offset` forward for statement terminators.
142    ///
143    /// Only scans newly appended bytes — does NOT re-scan from the start.
144    /// State (`self.state`, `self.block_comment_depth`) is preserved across calls.
145    fn scan_for_statements(&mut self) -> ParseResult {
146        let mut statements = Vec::new();
147
148        // We work on byte offsets using char_indices over the unscanned portion.
149        // But we need to handle multi-byte chars correctly, so iterate chars.
150        let buf = self.buffer.as_bytes();
151        let len = buf.len();
152        let mut i = self.scan_offset;
153
154        while i < len {
155            let (ch, char_len) = decode_char_at(&self.buffer, i);
156
157            match self.state {
158                LexState::Normal => {
159                    if ch == '\'' {
160                        self.state = LexState::SingleQuote;
161                        i += char_len;
162                    } else if ch == '"' {
163                        self.state = LexState::DoubleQuote;
164                        i += char_len;
165                    } else if ch == '$' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'$' {
166                        self.state = LexState::DollarQuote;
167                        i += 2;
168                    } else if ch == '-' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'-' {
169                        self.state = LexState::LineComment;
170                        i += 2;
171                    } else if ch == '/' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'*' {
172                        self.state = LexState::BlockComment;
173                        self.block_comment_depth = 1;
174                        i += 2;
175                    } else if ch == ';' {
176                        // Statement terminator found in Normal state.
177                        let raw = &self.buffer[self.stmt_start..i];
178                        let stripped = strip_comments(raw);
179                        let trimmed = stripped.trim();
180
181                        if self.in_batch {
182                            // Inside BEGIN BATCH … APPLY BATCH: semicolons
183                            // between DML statements are part of the batch
184                            // syntax, not statement terminators.  Only emit
185                            // when APPLY BATCH has been reached.
186                            if ends_with_apply_batch(trimmed) {
187                                self.in_batch = false;
188                                if !trimmed.is_empty() {
189                                    statements.push(trimmed.to_string());
190                                }
191                                self.stmt_start = i + 1;
192                            }
193                            // Otherwise keep accumulating; do NOT advance stmt_start.
194                            i += 1;
195                        } else if starts_with_begin_batch(trimmed) {
196                            // Opening of a BATCH block: treat the ';' as
197                            // internal to the batch, not as a terminator.
198                            self.in_batch = true;
199                            // Do NOT advance stmt_start — keep accumulating
200                            // from the start of BEGIN BATCH.
201                            i += 1;
202                        } else {
203                            if !trimmed.is_empty() {
204                                statements.push(trimmed.to_string());
205                            }
206                            self.stmt_start = i + 1; // skip the ';'
207                            i += 1;
208                        }
209                    } else {
210                        i += char_len;
211                    }
212                }
213                LexState::SingleQuote => {
214                    if ch == '\'' {
215                        // Check for escaped quote ('')
216                        if i + 1 < len && self.buffer.as_bytes()[i + 1] == b'\'' {
217                            i += 2; // skip escaped quote
218                        } else {
219                            self.state = LexState::Normal;
220                            i += 1;
221                        }
222                    } else {
223                        i += char_len;
224                    }
225                }
226                LexState::DoubleQuote => {
227                    if ch == '"' {
228                        // Check for escaped quote ("")
229                        if i + 1 < len && self.buffer.as_bytes()[i + 1] == b'"' {
230                            i += 2;
231                        } else {
232                            self.state = LexState::Normal;
233                            i += 1;
234                        }
235                    } else {
236                        i += char_len;
237                    }
238                }
239                LexState::DollarQuote => {
240                    if ch == '$' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'$' {
241                        self.state = LexState::Normal;
242                        i += 2;
243                    } else {
244                        i += char_len;
245                    }
246                }
247                LexState::LineComment => {
248                    if ch == '\n' {
249                        self.state = LexState::Normal;
250                    }
251                    i += char_len;
252                }
253                LexState::BlockComment => {
254                    if ch == '*' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'/' {
255                        self.block_comment_depth -= 1;
256                        if self.block_comment_depth == 0 {
257                            self.state = LexState::Normal;
258                        }
259                        i += 2;
260                    } else if ch == '/' && i + 1 < len && self.buffer.as_bytes()[i + 1] == b'*' {
261                        self.block_comment_depth += 1;
262                        i += 2;
263                    } else {
264                        i += char_len;
265                    }
266                }
267            }
268        }
269
270        self.scan_offset = i;
271
272        // Always compact the buffer when stmt_start has advanced past consumed
273        // content (e.g., empty statements like `;;` that were skipped).
274        if self.stmt_start > 0 {
275            self.buffer = self.buffer[self.stmt_start..].to_string();
276            self.scan_offset -= self.stmt_start;
277            self.stmt_start = 0;
278        }
279
280        // If the remaining buffer is only whitespace and/or comments (no
281        // meaningful CQL tokens), clear it so the REPL returns to the primary
282        // prompt. This handles trailing line comments after semicolons
283        // (e.g., `SELECT 1; -- comment`) and bare `;;`.
284        if !self.buffer.is_empty() {
285            let stripped = strip_comments(&self.buffer);
286            if stripped.trim().is_empty() {
287                self.buffer.clear();
288                self.scan_offset = 0;
289                self.state = LexState::Normal;
290                self.block_comment_depth = 0;
291            }
292        }
293
294        if statements.is_empty() {
295            ParseResult::Incomplete
296        } else {
297            ParseResult::Complete(statements)
298        }
299    }
300}
301
302/// Decode the char at byte offset `i` in `s`, returning the char and its UTF-8 byte length.
303fn decode_char_at(s: &str, i: usize) -> (char, usize) {
304    // Safety: `i` must be at a char boundary, which our state machine guarantees
305    // because we always advance by `char_len`.
306    let ch = s[i..].chars().next().unwrap_or('\0');
307    (ch, ch.len_utf8())
308}
309
310/// Return true if `text` is the opening of a CQL BATCH block.
311///
312/// Matches: `BEGIN BATCH`, `BEGIN UNLOGGED BATCH`, `BEGIN COUNTER BATCH`
313/// (case-insensitive, any amount of internal whitespace).
314fn starts_with_begin_batch(text: &str) -> bool {
315    let words: Vec<&str> = text.split_whitespace().collect();
316    match words.as_slice() {
317        [b, batch, ..]
318            if b.eq_ignore_ascii_case("BEGIN") && batch.eq_ignore_ascii_case("BATCH") =>
319        {
320            true
321        }
322        [b, modifier, batch, ..]
323            if b.eq_ignore_ascii_case("BEGIN")
324                && (modifier.eq_ignore_ascii_case("UNLOGGED")
325                    || modifier.eq_ignore_ascii_case("COUNTER"))
326                && batch.eq_ignore_ascii_case("BATCH") =>
327        {
328            true
329        }
330        _ => false,
331    }
332}
333
334/// Return true if `text` ends with the `APPLY BATCH` token pair.
335fn ends_with_apply_batch(text: &str) -> bool {
336    let words: Vec<&str> = text.split_whitespace().collect();
337    matches!(
338        words.as_slice(),
339        [.., apply, batch]
340            if apply.eq_ignore_ascii_case("APPLY") && batch.eq_ignore_ascii_case("BATCH")
341    )
342}
343
344/// Strip comments from a CQL fragment (used on extracted statements).
345///
346/// This function uses context-aware scanning to avoid stripping comment-like
347/// sequences inside string literals (PR #150 fix). Handles nested block comments.
348fn strip_comments(input: &str) -> String {
349    let mut result = String::with_capacity(input.len());
350    let mut state = LexState::Normal;
351    let mut block_depth: usize = 0;
352    let bytes = input.as_bytes();
353    let len = bytes.len();
354    let mut i = 0;
355
356    while i < len {
357        let (ch, char_len) = decode_char_at(input, i);
358
359        match state {
360            LexState::Normal => {
361                if ch == '\'' {
362                    state = LexState::SingleQuote;
363                    result.push(ch);
364                    i += char_len;
365                } else if ch == '"' {
366                    state = LexState::DoubleQuote;
367                    result.push(ch);
368                    i += char_len;
369                } else if ch == '$' && i + 1 < len && bytes[i + 1] == b'$' {
370                    state = LexState::DollarQuote;
371                    result.push_str("$$");
372                    i += 2;
373                } else if ch == '-' && i + 1 < len && bytes[i + 1] == b'-' {
374                    // Line comment: skip to end of line
375                    state = LexState::LineComment;
376                    i += 2;
377                } else if ch == '/' && i + 1 < len && bytes[i + 1] == b'*' {
378                    // Block comment: skip content
379                    state = LexState::BlockComment;
380                    block_depth = 1;
381                    i += 2;
382                } else {
383                    result.push(ch);
384                    i += char_len;
385                }
386            }
387            LexState::SingleQuote => {
388                result.push(ch);
389                if ch == '\'' {
390                    if i + 1 < len && bytes[i + 1] == b'\'' {
391                        result.push('\'');
392                        i += 2;
393                    } else {
394                        state = LexState::Normal;
395                        i += 1;
396                    }
397                } else {
398                    i += char_len;
399                }
400            }
401            LexState::DoubleQuote => {
402                result.push(ch);
403                if ch == '"' {
404                    if i + 1 < len && bytes[i + 1] == b'"' {
405                        result.push('"');
406                        i += 2;
407                    } else {
408                        state = LexState::Normal;
409                        i += 1;
410                    }
411                } else {
412                    i += char_len;
413                }
414            }
415            LexState::DollarQuote => {
416                result.push(ch);
417                if ch == '$' && i + 1 < len && bytes[i + 1] == b'$' {
418                    result.push('$');
419                    state = LexState::Normal;
420                    i += 2;
421                } else {
422                    i += char_len;
423                }
424            }
425            LexState::LineComment => {
426                if ch == '\n' {
427                    result.push('\n');
428                    state = LexState::Normal;
429                }
430                i += char_len;
431            }
432            LexState::BlockComment => {
433                if ch == '*' && i + 1 < len && bytes[i + 1] == b'/' {
434                    block_depth -= 1;
435                    if block_depth == 0 {
436                        // Replace entire block comment with a space to avoid token merging
437                        result.push(' ');
438                        state = LexState::Normal;
439                    }
440                    i += 2;
441                } else if ch == '/' && i + 1 < len && bytes[i + 1] == b'*' {
442                    block_depth += 1;
443                    i += 2;
444                } else {
445                    i += char_len;
446                }
447            }
448        }
449    }
450
451    result
452}
453
454/// Classify a complete input as a shell command, CQL statement, or empty.
455pub fn classify_input(input: &str) -> InputKind {
456    let trimmed = input.trim();
457    if trimmed.is_empty() {
458        return InputKind::Empty;
459    }
460
461    if is_shell_command(trimmed) {
462        InputKind::ShellCommand(trimmed.to_string())
463    } else {
464        InputKind::CqlStatement(trimmed.to_string())
465    }
466}
467
468/// Check if the first line of input looks like a shell command.
469///
470/// Used by the REPL to decide whether to wait for a semicolon
471/// or dispatch immediately.
472#[must_use]
473pub fn is_shell_command(line: &str) -> bool {
474    let trimmed = line.trim();
475    // Strip trailing semicolon for command detection
476    let without_semi = trimmed.strip_suffix(';').unwrap_or(trimmed).trim();
477    let first_word = without_semi
478        .split_whitespace()
479        .next()
480        .unwrap_or("")
481        .to_uppercase();
482
483    SHELL_COMMANDS.contains(&first_word.as_str())
484}
485
486/// Parse a complete input string (e.g., from `-e` or `-f` batch mode)
487/// into individual statements.
488///
489/// Returns a vector of complete, comment-stripped statements.
490/// This is O(n) in the input size (not O(n²) per PR #151).
491#[must_use]
492pub fn parse_batch(input: &str) -> Vec<String> {
493    let mut parser = StatementParser::new();
494    let mut all_statements = Vec::new();
495
496    for line in input.lines() {
497        if let ParseResult::Complete(stmts) = parser.feed_line(line) {
498            all_statements.extend(stmts);
499        }
500    }
501
502    // Handle any remaining content without a trailing semicolon.
503    // Shell commands don't need semicolons; CQL statements do.
504    let remaining = parser.remaining().trim();
505    if !remaining.is_empty() {
506        let stripped = strip_comments(remaining);
507        let trimmed = stripped.trim();
508        if !trimmed.is_empty() && is_shell_command(trimmed) {
509            all_statements.push(trimmed.to_string());
510        }
511        // Non-shell-command without semicolon is incomplete — drop it
512        // (matches Python cqlsh batch mode behavior)
513    }
514
515    all_statements
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521
522    // --- Basic semicolon detection ---
523
524    #[test]
525    fn simple_statement() {
526        let mut p = StatementParser::new();
527        let result = p.feed_line("SELECT * FROM users;");
528        assert_eq!(
529            result,
530            ParseResult::Complete(vec!["SELECT * FROM users".to_string()])
531        );
532    }
533
534    #[test]
535    fn statement_with_trailing_whitespace() {
536        let mut p = StatementParser::new();
537        let result = p.feed_line("SELECT * FROM users;  ");
538        assert_eq!(
539            result,
540            ParseResult::Complete(vec!["SELECT * FROM users".to_string()])
541        );
542    }
543
544    #[test]
545    fn incomplete_no_semicolon() {
546        let mut p = StatementParser::new();
547        assert_eq!(p.feed_line("SELECT * FROM users"), ParseResult::Incomplete);
548    }
549
550    #[test]
551    fn empty_input() {
552        let mut p = StatementParser::new();
553        assert_eq!(p.feed_line(""), ParseResult::Incomplete);
554        assert_eq!(p.feed_line("   "), ParseResult::Incomplete);
555    }
556
557    // --- Single-quoted string handling ---
558
559    #[test]
560    fn semicolon_in_single_quoted_string() {
561        let mut p = StatementParser::new();
562        let result = p.feed_line("INSERT INTO t (v) VALUES ('hello;world');");
563        assert_eq!(
564            result,
565            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('hello;world')".to_string()])
566        );
567    }
568
569    #[test]
570    fn escaped_quote_in_string() {
571        let mut p = StatementParser::new();
572        let result = p.feed_line("INSERT INTO t (v) VALUES ('it''s;here');");
573        assert_eq!(
574            result,
575            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('it''s;here')".to_string()])
576        );
577    }
578
579    // --- Double-quoted identifier handling ---
580
581    #[test]
582    fn semicolon_in_double_quoted_identifier() {
583        let mut p = StatementParser::new();
584        let result = p.feed_line("SELECT \"col;name\" FROM t;");
585        assert_eq!(
586            result,
587            ParseResult::Complete(vec!["SELECT \"col;name\" FROM t".to_string()])
588        );
589    }
590
591    #[test]
592    fn escaped_double_quote() {
593        let mut p = StatementParser::new();
594        let result = p.feed_line("SELECT \"col\"\"name\" FROM t;");
595        assert_eq!(
596            result,
597            ParseResult::Complete(vec!["SELECT \"col\"\"name\" FROM t".to_string()])
598        );
599    }
600
601    // --- Dollar-quoted string handling ---
602
603    #[test]
604    fn semicolon_in_dollar_quoted_string() {
605        let mut p = StatementParser::new();
606        let result = p.feed_line("CREATE FUNCTION f() RETURNS NULL ON NULL INPUT RETURNS text LANGUAGE java AS $$return a;$$;");
607        assert_eq!(result, ParseResult::Complete(vec![
608            "CREATE FUNCTION f() RETURNS NULL ON NULL INPUT RETURNS text LANGUAGE java AS $$return a;$$".to_string()
609        ]));
610    }
611
612    #[test]
613    fn dollar_quote_multiline() {
614        let mut p = StatementParser::new();
615        assert_eq!(
616            p.feed_line("CREATE FUNCTION f() RETURNS text LANGUAGE java AS $$"),
617            ParseResult::Incomplete
618        );
619        assert_eq!(p.feed_line("  return a;"), ParseResult::Incomplete);
620        let result = p.feed_line("$$;");
621        assert!(matches!(result, ParseResult::Complete(_)));
622    }
623
624    #[test]
625    fn empty_dollar_quote() {
626        let mut p = StatementParser::new();
627        let result = p.feed_line("SELECT $$$$;");
628        assert_eq!(
629            result,
630            ParseResult::Complete(vec!["SELECT $$$$".to_string()])
631        );
632    }
633
634    // --- Line comment stripping ---
635
636    #[test]
637    fn line_comment_stripped() {
638        let mut p = StatementParser::new();
639        let result = p.feed_line("SELECT * FROM t; -- this is a comment");
640        assert_eq!(
641            result,
642            ParseResult::Complete(vec!["SELECT * FROM t".to_string()])
643        );
644    }
645
646    #[test]
647    fn line_comment_does_not_terminate() {
648        let mut p = StatementParser::new();
649        // Semicolon inside line comment should not terminate
650        assert_eq!(
651            p.feed_line("SELECT * FROM t -- comment with ;"),
652            ParseResult::Incomplete
653        );
654    }
655
656    #[test]
657    fn line_comment_then_statement_across_lines() {
658        let mut p = StatementParser::new();
659        assert_eq!(p.feed_line("-- header comment"), ParseResult::Incomplete);
660        let result = p.feed_line("SELECT 1;");
661        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
662    }
663
664    // --- Block comment stripping (PR #150) ---
665
666    #[test]
667    fn block_comment_stripped() {
668        let mut p = StatementParser::new();
669        let result = p.feed_line("SELECT /* comment */ * FROM t;");
670        assert_eq!(
671            result,
672            ParseResult::Complete(vec!["SELECT   * FROM t".to_string()])
673        );
674    }
675
676    #[test]
677    fn block_comment_with_semicolon() {
678        let mut p = StatementParser::new();
679        // Semicolon inside block comment should not terminate
680        let result = p.feed_line("SELECT /* ; */ * FROM t;");
681        assert_eq!(
682            result,
683            ParseResult::Complete(vec!["SELECT   * FROM t".to_string()])
684        );
685    }
686
687    #[test]
688    fn block_comment_chars_in_single_quoted_string() {
689        // PR #150: /* inside strings must NOT be treated as comment start
690        let mut p = StatementParser::new();
691        let result = p.feed_line("INSERT INTO t (v) VALUES ('/* not a comment */');");
692        assert_eq!(
693            result,
694            ParseResult::Complete(vec![
695                "INSERT INTO t (v) VALUES ('/* not a comment */')".to_string()
696            ])
697        );
698    }
699
700    #[test]
701    fn block_comment_chars_in_double_quoted_string() {
702        let mut p = StatementParser::new();
703        let result = p.feed_line("SELECT \"/* not a comment */\" FROM t;");
704        assert_eq!(
705            result,
706            ParseResult::Complete(vec!["SELECT \"/* not a comment */\" FROM t".to_string()])
707        );
708    }
709
710    #[test]
711    fn block_comment_chars_in_dollar_quoted_string() {
712        let mut p = StatementParser::new();
713        let result = p.feed_line("SELECT $$/* not a comment */$$;");
714        assert_eq!(
715            result,
716            ParseResult::Complete(vec!["SELECT $$/* not a comment */$$".to_string()])
717        );
718    }
719
720    #[test]
721    fn block_comment_across_feed_lines() {
722        let mut p = StatementParser::new();
723        assert_eq!(p.feed_line("SELECT /* start"), ParseResult::Incomplete);
724        assert_eq!(p.feed_line("still comment"), ParseResult::Incomplete);
725        let result = p.feed_line("end */ 1;");
726        assert_eq!(
727            result,
728            ParseResult::Complete(vec!["SELECT   1".to_string()])
729        );
730    }
731
732    #[test]
733    fn nested_block_comments() {
734        let mut p = StatementParser::new();
735        let result = p.feed_line("SELECT /* outer /* inner */ still comment */ 1;");
736        assert_eq!(
737            result,
738            ParseResult::Complete(vec!["SELECT   1".to_string()])
739        );
740    }
741
742    #[test]
743    fn nested_block_comments_stripped() {
744        let input = "SELECT /* outer /* inner */ still */ 1";
745        let result = strip_comments(input);
746        assert_eq!(result, "SELECT   1");
747    }
748
749    // --- Multi-line statement buffering ---
750
751    #[test]
752    fn multiline_statement() {
753        let mut p = StatementParser::new();
754        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
755        assert_eq!(p.feed_line("FROM users"), ParseResult::Incomplete);
756        let result = p.feed_line("WHERE id = 1;");
757        assert_eq!(
758            result,
759            ParseResult::Complete(vec!["SELECT *\nFROM users\nWHERE id = 1".to_string()])
760        );
761    }
762
763    #[test]
764    fn multiline_with_string_across_lines() {
765        let mut p = StatementParser::new();
766        assert_eq!(
767            p.feed_line("INSERT INTO t (v) VALUES ('hello"),
768            ParseResult::Incomplete
769        );
770        let result = p.feed_line("world');");
771        assert_eq!(
772            result,
773            ParseResult::Complete(vec!["INSERT INTO t (v) VALUES ('hello\nworld')".to_string()])
774        );
775    }
776
777    // --- Empty statement handling ---
778
779    #[test]
780    fn empty_statement_skipped() {
781        let mut p = StatementParser::new();
782        let result = p.feed_line(";;");
783        // Both semicolons produce empty statements which are skipped
784        assert_eq!(result, ParseResult::Incomplete);
785    }
786
787    #[test]
788    fn empty_between_statements() {
789        let mut p = StatementParser::new();
790        let result = p.feed_line("SELECT 1; ; SELECT 2;");
791        assert_eq!(
792            result,
793            ParseResult::Complete(vec!["SELECT 1".to_string(), "SELECT 2".to_string(),])
794        );
795    }
796
797    // --- Built-in command detection ---
798
799    #[test]
800    fn shell_commands_detected() {
801        assert!(is_shell_command("HELP"));
802        assert!(is_shell_command("?"));
803        assert!(is_shell_command("QUIT"));
804        assert!(is_shell_command("EXIT"));
805        assert!(is_shell_command("DESCRIBE KEYSPACES"));
806        assert!(is_shell_command("DESC TABLE users"));
807        assert!(is_shell_command("CONSISTENCY ONE"));
808        assert!(is_shell_command("TRACING ON"));
809        assert!(is_shell_command("EXPAND ON"));
810        assert!(is_shell_command("PAGING 100"));
811        assert!(is_shell_command("SHOW VERSION"));
812        assert!(is_shell_command("CLEAR"));
813        assert!(is_shell_command("CLS"));
814        assert!(is_shell_command("COPY users TO '/tmp/data.csv'"));
815        assert!(is_shell_command("USE my_keyspace"));
816    }
817
818    #[test]
819    fn shell_command_case_insensitive() {
820        assert!(is_shell_command("help"));
821        assert!(is_shell_command("quit"));
822        assert!(is_shell_command("Help"));
823        assert!(is_shell_command("describe keyspaces"));
824        assert!(is_shell_command("use my_ks"));
825    }
826
827    #[test]
828    fn shell_command_with_semicolon() {
829        assert!(is_shell_command("USE my_ks;"));
830        assert!(is_shell_command("HELP;"));
831    }
832
833    #[test]
834    fn cql_not_shell_command() {
835        assert!(!is_shell_command("SELECT * FROM users"));
836        assert!(!is_shell_command("INSERT INTO t (id) VALUES (1)"));
837        assert!(!is_shell_command("CREATE TABLE test (id int PRIMARY KEY)"));
838    }
839
840    // --- Command classification ---
841
842    #[test]
843    fn classify_shell_command() {
844        assert_eq!(
845            classify_input("HELP"),
846            InputKind::ShellCommand("HELP".to_string())
847        );
848        assert_eq!(
849            classify_input("USE my_ks"),
850            InputKind::ShellCommand("USE my_ks".to_string())
851        );
852    }
853
854    #[test]
855    fn classify_shell_command_with_semicolon() {
856        assert_eq!(
857            classify_input("USE my_ks;"),
858            InputKind::ShellCommand("USE my_ks;".to_string())
859        );
860    }
861
862    #[test]
863    fn classify_cql_statement() {
864        assert_eq!(
865            classify_input("SELECT * FROM users"),
866            InputKind::CqlStatement("SELECT * FROM users".to_string())
867        );
868    }
869
870    #[test]
871    fn classify_empty() {
872        assert_eq!(classify_input(""), InputKind::Empty);
873        assert_eq!(classify_input("   "), InputKind::Empty);
874    }
875
876    // --- Multiple statements on one line ---
877
878    #[test]
879    fn multiple_statements_one_line() {
880        let mut p = StatementParser::new();
881        let result = p.feed_line("SELECT 1; SELECT 2; SELECT 3;");
882        assert_eq!(
883            result,
884            ParseResult::Complete(vec![
885                "SELECT 1".to_string(),
886                "SELECT 2".to_string(),
887                "SELECT 3".to_string(),
888            ])
889        );
890    }
891
892    // --- Whitespace normalization ---
893
894    #[test]
895    fn leading_trailing_whitespace_trimmed() {
896        let mut p = StatementParser::new();
897        let result = p.feed_line("  SELECT * FROM t  ;  ");
898        assert_eq!(
899            result,
900            ParseResult::Complete(vec!["SELECT * FROM t".to_string()])
901        );
902    }
903
904    // --- Batch mode parsing ---
905
906    #[test]
907    fn parse_batch_basic() {
908        let input = "SELECT 1;\nSELECT 2;\n";
909        let stmts = parse_batch(input);
910        assert_eq!(stmts, vec!["SELECT 1", "SELECT 2"]);
911    }
912
913    #[test]
914    fn parse_batch_with_comments() {
915        let input = "-- header comment\nSELECT 1; -- inline\nSELECT /* x */ 2;\n";
916        let stmts = parse_batch(input);
917        assert_eq!(stmts, vec!["SELECT 1", "SELECT   2"]);
918    }
919
920    #[test]
921    fn parse_batch_multiline_statement() {
922        let input = "SELECT *\nFROM users\nWHERE id = 1;\n";
923        let stmts = parse_batch(input);
924        assert_eq!(stmts, vec!["SELECT *\nFROM users\nWHERE id = 1"]);
925    }
926
927    #[test]
928    fn parse_batch_with_shell_command() {
929        let input = "SELECT 1;\nUSE my_ks\n";
930        let stmts = parse_batch(input);
931        assert_eq!(stmts, vec!["SELECT 1", "USE my_ks"]);
932    }
933
934    #[test]
935    fn parse_batch_drops_incomplete_cql() {
936        // CQL without semicolon at end of file is dropped (Python cqlsh behavior)
937        let input = "SELECT 1;\nSELECT 2";
938        let stmts = parse_batch(input);
939        assert_eq!(stmts, vec!["SELECT 1"]);
940    }
941
942    #[test]
943    fn parse_batch_only_comments() {
944        let input = "-- just a comment\n/* block */\n";
945        let stmts = parse_batch(input);
946        assert!(stmts.is_empty());
947    }
948
949    // --- Comment stripping edge cases ---
950
951    #[test]
952    fn strip_comments_preserves_strings() {
953        let input = "SELECT '-- not a comment' FROM t";
954        let result = strip_comments(input);
955        assert_eq!(result, "SELECT '-- not a comment' FROM t");
956    }
957
958    #[test]
959    fn strip_comments_preserves_dollar_strings() {
960        let input = "SELECT $$-- not a comment$$ FROM t";
961        let result = strip_comments(input);
962        assert_eq!(result, "SELECT $$-- not a comment$$ FROM t");
963    }
964
965    #[test]
966    fn strip_comments_multiline_block() {
967        let input = "SELECT /* multi\nline\ncomment */ 1";
968        let result = strip_comments(input);
969        // Block comment is replaced with a single space, plus the existing space = "  "
970        assert_eq!(result, "SELECT   1");
971    }
972
973    // --- Parser reset ---
974
975    #[test]
976    fn reset_clears_state() {
977        let mut p = StatementParser::new();
978        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
979        assert!(!p.is_empty());
980
981        p.reset();
982        assert!(p.is_empty());
983
984        // After reset, should start fresh
985        let result = p.feed_line("SELECT 1;");
986        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
987    }
988
989    // --- Parser reuse after Complete ---
990
991    #[test]
992    fn reuse_after_complete() {
993        let mut p = StatementParser::new();
994        let r1 = p.feed_line("SELECT 1;");
995        assert_eq!(r1, ParseResult::Complete(vec!["SELECT 1".to_string()]));
996
997        // Parser should work for subsequent statements
998        let r2 = p.feed_line("SELECT 2;");
999        assert_eq!(r2, ParseResult::Complete(vec!["SELECT 2".to_string()]));
1000    }
1001
1002    #[test]
1003    fn reuse_after_complete_multiline() {
1004        let mut p = StatementParser::new();
1005        assert_eq!(
1006            p.feed_line("SELECT 1;"),
1007            ParseResult::Complete(vec!["SELECT 1".to_string()])
1008        );
1009
1010        // Now a multi-line statement
1011        assert_eq!(p.feed_line("SELECT *"), ParseResult::Incomplete);
1012        let result = p.feed_line("FROM t;");
1013        assert_eq!(
1014            result,
1015            ParseResult::Complete(vec!["SELECT *\nFROM t".to_string()])
1016        );
1017    }
1018
1019    // --- Unterminated constructs ---
1020
1021    #[test]
1022    fn unterminated_string_blocks_semicolon() {
1023        let stmts = parse_batch("SELECT 'unterminated;");
1024        assert!(stmts.is_empty());
1025    }
1026
1027    #[test]
1028    fn unterminated_block_comment_blocks_semicolon() {
1029        let stmts = parse_batch("SELECT /* never closed;");
1030        assert!(stmts.is_empty());
1031    }
1032
1033    // --- Backslash in strings ---
1034
1035    #[test]
1036    fn backslash_in_string_is_literal() {
1037        // CQL does NOT use backslash escaping (uses '' instead)
1038        let mut p = StatementParser::new();
1039        let result = p.feed_line("SELECT '\\';");
1040        assert_eq!(
1041            result,
1042            ParseResult::Complete(vec!["SELECT '\\'".to_string()])
1043        );
1044    }
1045
1046    // --- Unicode handling ---
1047
1048    #[test]
1049    fn unicode_in_strings() {
1050        let mut p = StatementParser::new();
1051        let result = p.feed_line("INSERT INTO t (v) VALUES ('héllo wörld; café');");
1052        assert_eq!(
1053            result,
1054            ParseResult::Complete(vec![
1055                "INSERT INTO t (v) VALUES ('héllo wörld; café')".to_string()
1056            ])
1057        );
1058    }
1059
1060    #[test]
1061    fn unicode_identifier() {
1062        let mut p = StatementParser::new();
1063        let result = p.feed_line("SELECT \"naïve;col\" FROM t;");
1064        assert_eq!(
1065            result,
1066            ParseResult::Complete(vec!["SELECT \"naïve;col\" FROM t".to_string()])
1067        );
1068    }
1069
1070    // --- Incremental scan correctness ---
1071
1072    #[test]
1073    fn incremental_scan_preserves_state_across_lines() {
1074        // Verify that the parser doesn't re-scan from the start each time.
1075        // This is a correctness test: if state weren't preserved,
1076        // the second line's `'` would start a new string context.
1077        let mut p = StatementParser::new();
1078        assert_eq!(
1079            p.feed_line("INSERT INTO t VALUES ('multi"),
1080            ParseResult::Incomplete
1081        );
1082        assert_eq!(
1083            p.feed_line("line string with ; inside"),
1084            ParseResult::Incomplete
1085        );
1086        let result = p.feed_line("end of string');");
1087        assert_eq!(
1088            result,
1089            ParseResult::Complete(vec![
1090                "INSERT INTO t VALUES ('multi\nline string with ; inside\nend of string')"
1091                    .to_string()
1092            ])
1093        );
1094    }
1095
1096    // --- BUG-7: Inline comment after semicolon ---
1097
1098    #[test]
1099    fn inline_comment_after_semicolon_clears_buffer() {
1100        let mut p = StatementParser::new();
1101        let result = p.feed_line("SELECT 1; -- inline comment");
1102        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1103        // Parser should be empty — no continuation prompt
1104        assert!(p.is_empty());
1105    }
1106
1107    #[test]
1108    fn inline_comment_after_semicolon_next_statement_works() {
1109        let mut p = StatementParser::new();
1110        let r1 = p.feed_line("SELECT 1; -- comment");
1111        assert_eq!(r1, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1112        assert!(p.is_empty());
1113
1114        // Next statement should work normally
1115        let r2 = p.feed_line("SELECT 2;");
1116        assert_eq!(r2, ParseResult::Complete(vec!["SELECT 2".to_string()]));
1117    }
1118
1119    // --- BUG-8: Bare ;; enters continuation ---
1120
1121    #[test]
1122    fn bare_semicolons_clear_buffer() {
1123        let mut p = StatementParser::new();
1124        let result = p.feed_line(";;");
1125        assert_eq!(result, ParseResult::Incomplete);
1126        // Parser should be empty — no continuation prompt
1127        assert!(p.is_empty());
1128    }
1129
1130    #[test]
1131    fn bare_semicolons_then_statement() {
1132        let mut p = StatementParser::new();
1133        assert_eq!(p.feed_line(";;"), ParseResult::Incomplete);
1134        assert!(p.is_empty());
1135
1136        let result = p.feed_line("SELECT 1;");
1137        assert_eq!(result, ParseResult::Complete(vec!["SELECT 1".to_string()]));
1138    }
1139
1140    #[test]
1141    fn only_whitespace_and_comments_clears_buffer() {
1142        let mut p = StatementParser::new();
1143        assert_eq!(p.feed_line("-- just a comment"), ParseResult::Incomplete);
1144        assert!(p.is_empty());
1145    }
1146
1147    #[test]
1148    fn block_comment_only_clears_buffer() {
1149        let mut p = StatementParser::new();
1150        assert_eq!(p.feed_line("/* block comment */"), ParseResult::Incomplete);
1151        assert!(p.is_empty());
1152    }
1153}