Skip to content
Merged
2 changes: 1 addition & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ pub use self::trigger::{

pub use self::value::{
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
NormalizationForm, TrimWhereField, Value, ValueWithSpan,
NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan,
};

use crate::ast::helpers::key_value_options::KeyValueOptions;
Expand Down
32 changes: 32 additions & 0 deletions src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ pub enum Value {
TripleDoubleQuotedRawStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
QuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
/// X'hex value'
HexStringLiteral(String),

Expand Down Expand Up @@ -207,6 +213,8 @@ impl Value {
| Value::NationalStringLiteral(s)
| Value::HexStringLiteral(s) => Some(s),
Value::DollarQuotedString(s) => Some(s.value),
Value::QuoteDelimitedStringLiteral(s) => Some(s.value),
Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value),
_ => None,
}
}
Expand Down Expand Up @@ -242,6 +250,8 @@ impl fmt::Display for Value {
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{v}'"),
Value::QuoteDelimitedStringLiteral(v) => v.fmt(f),
Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"),
Value::HexStringLiteral(v) => write!(f, "X'{v}'"),
Value::Boolean(v) => write!(f, "{v}"),
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
Expand Down Expand Up @@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString {
}
}

/// A quote delimited string literal, e.g. `Q'_abc_'`.
///
/// See [Value::QuoteDelimitedStringLiteral] and/or
/// [Value::NationalQuoteDelimitedStringLiteral].
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct QuoteDelimitedString {
/// the quote start character; i.e. the character _after_ the opening `Q'`
pub start_quote: char,
/// the string literal value itself
pub value: String,
/// the quote end character; i.e. the character _before_ the closing `'`
pub end_quote: char,
}

impl fmt::Display for QuoteDelimitedString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote)
}
}

#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,8 @@ impl Dialect for GenericDialect {
fn supports_interval_options(&self) -> bool {
true
}

fn supports_quote_delimited_string(&self) -> bool {
true
}
}
7 changes: 7 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any {
fn supports_semantic_view_table_factor(&self) -> bool {
false
}

/// Support quote delimited string literals, e.g. `Q'{...}'`
///
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
fn supports_quote_delimited_string(&self) -> bool {
false
}
}

/// This represents the operators for which precedence must be defined
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/oracle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,4 +95,8 @@ impl Dialect for OracleDialect {
fn supports_group_by_expr(&self) -> bool {
true
}

fn supports_quote_delimited_string(&self) -> bool {
true
}
}
2 changes: 1 addition & 1 deletion src/parser/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! SQL Parser for a `MERGE` statement

#[cfg(not(feature = "std"))]
use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec};
use alloc::{boxed::Box, format, vec, vec::Vec};

use crate::{
ast::{
Expand Down
10 changes: 10 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> {
| Token::TripleSingleQuotedRawStringLiteral(_)
| Token::TripleDoubleQuotedRawStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_)
| Token::NationalQuoteDelimitedStringLiteral(_)
| Token::HexStringLiteral(_) => {
self.prev_token();
Ok(Expr::Value(self.parse_value()?))
Expand Down Expand Up @@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> {
| Token::EscapedStringLiteral(_)
| Token::UnicodeStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::QuoteDelimitedStringLiteral(_)
| Token::NationalQuoteDelimitedStringLiteral(_)
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
_ => self.expected(
"either filler, WITH, or WITHOUT in LISTAGG",
Expand Down Expand Up @@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> {
Token::NationalStringLiteral(ref s) => {
ok_value(Value::NationalStringLiteral(s.to_string()))
}
Token::QuoteDelimitedStringLiteral(v) => {
ok_value(Value::QuoteDelimitedStringLiteral(v))
}
Token::NationalQuoteDelimitedStringLiteral(v) => {
ok_value(Value::NationalQuoteDelimitedStringLiteral(v))
}
Token::EscapedStringLiteral(ref s) => {
ok_value(Value::EscapedStringLiteral(s.to_string()))
}
Expand Down
92 changes: 90 additions & 2 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ use alloc::{
vec,
vec::Vec,
};
use core::iter::Peekable;
use core::num::NonZeroU8;
use core::str::Chars;
use core::{cmp, fmt};
use core::{iter::Peekable, str};

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
Expand All @@ -46,7 +46,10 @@ use crate::dialect::{
SnowflakeDialect,
};
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
use crate::{ast::DollarQuotedString, dialect::HiveDialect};
use crate::{
ast::{DollarQuotedString, QuoteDelimitedString},
dialect::HiveDialect,
};

/// SQL Token enumeration
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
Expand Down Expand Up @@ -98,6 +101,12 @@ pub enum Token {
TripleDoubleQuotedRawStringLiteral(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
QuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'`
/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA)
NationalQuoteDelimitedStringLiteral(QuoteDelimitedString),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
EscapedStringLiteral(String),
/// Unicode string literal: i.e: U&'first \000A second'
Expand Down Expand Up @@ -292,6 +301,8 @@ impl fmt::Display for Token {
Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""),
Token::DollarQuotedString(ref s) => write!(f, "{s}"),
Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"),
Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f),
Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"),
Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"),
Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"),
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
Expand Down Expand Up @@ -1032,13 +1043,35 @@ impl<'a> Tokenizer<'a> {
self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?;
Ok(Some(Token::NationalStringLiteral(s)))
}
Some(&q @ 'q') | Some(&q @ 'Q')
if self.dialect.supports_quote_delimited_string() =>
{
chars.next(); // consume and check the next char
if let Some('\'') = chars.peek() {
self.tokenize_quote_delimited_string(chars, &[n, q])
.map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s)))
} else {
let s = self.tokenize_word(String::from_iter([n, q]), chars);
Ok(Some(Token::make_word(&s, None)))
}
}
_ => {
// regular identifier starting with an "N"
let s = self.tokenize_word(n, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
}
q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => {
chars.next(); // consume and check the next char
if let Some('\'') = chars.peek() {
self.tokenize_quote_delimited_string(chars, &[q])
.map(|s| Some(Token::QuoteDelimitedStringLiteral(s)))
} else {
let s = self.tokenize_word(q, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => {
let starting_loc = chars.location();
Expand Down Expand Up @@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> {
)
}

/// Reads a quote delimited string expecting `chars.next()` to deliver a quote.
///
/// See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA>
fn tokenize_quote_delimited_string(
&self,
chars: &mut State,
// the prefix that introduced the possible literal or word,
// e.g. "Q" or "nq"
literal_prefix: &[char],
) -> Result<QuoteDelimitedString, TokenizerError> {
let literal_start_loc = chars.location();
chars.next();

let start_quote_loc = chars.location();
let (start_quote, end_quote) = match chars.next() {
None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => {
return self.tokenizer_error(
start_quote_loc,
format!(
"Invalid space, tab, newline, or EOF after '{}''",
String::from_iter(literal_prefix)
),
);
}
Some(c) => (
c,
match c {
'[' => ']',
'{' => '}',
'<' => '>',
'(' => ')',
c => c,
},
),
};

// read the string literal until the "quote character" following a by literal quote
let mut value = String::new();
while let Some(ch) = chars.next() {
if ch == end_quote {
if let Some('\'') = chars.peek() {
chars.next(); // ~ consume the quote
return Ok(QuoteDelimitedString {
start_quote,
value,
end_quote,
});
}
}
value.push(ch);
}

self.tokenizer_error(literal_start_loc, "Unterminated string literal")
}

/// Read a quoted string.
fn tokenize_quoted_string(
&self,
Expand Down
Loading