Python源码示例:sqlparse.sql.Identifier()
示例1
def parse_partial_identifier(word):
"""Attempt to parse a (partially typed) word as an identifier
word may include a schema qualification, like `schema_name.partial_name`
or `schema_name.` There may also be unclosed quotation marks, like
`"schema`, or `schema."partial_name`
:param word: string representing a (partially complete) identifier
:return: sqlparse.sql.Identifier, or None
"""
p = sqlparse.parse(word)[0]
n_tok = len(p.tokens)
if n_tok == 1 and isinstance(p.tokens[0], Identifier):
return p.tokens[0]
elif p.token_next_by(m=(Error, '"'))[1]:
# An unmatched double quote, e.g. '"foo', 'foo."', or 'foo."bar'
# Close the double quote, then reparse
return parse_partial_identifier(word + '"')
else:
return None
示例2
def sql_literal_to_model(tok, m=M):
"""
:param m: the source model to "append" the literal to.
defaults to M - the sqlitis models module (which means a fresh model
is created)
:return: the resulting model
"""
def is_string_literal(tok):
text = tok.normalized
return all([text.startswith('"'), text.endswith('"')])
# sqlparse treats string literals as identifiers
if type(tok) is S.Identifier and is_string_literal(tok):
return m.Field(tok.normalized, literal=True)
elif type(tok) is S.Identifier:
return m.Field(tok.normalized)
elif tok.ttype is T.Comparison:
return m.Op(tok.normalized)
elif tok.ttype in [T.Literal, T.String, T.Number, T.Number.Integer, T.Number.Float]:
return m.Field(tok.normalized, literal=True)
return None
示例3
def _extract_table_identifiers(token_stream):
for item in token_stream:
if isinstance(item, IdentifierList):
for ident in item.get_identifiers():
try:
alias = ident.get_alias()
schema_name = ident.get_parent_name()
real_name = ident.get_real_name()
except AttributeError:
continue
if real_name:
yield Reference(schema_name, real_name,
alias, _identifier_is_function(ident))
elif isinstance(item, Identifier):
yield Reference(item.get_parent_name(), item.get_real_name(),
item.get_alias(), _identifier_is_function(item))
elif isinstance(item, Function):
yield Reference(item.get_parent_name(), item.get_real_name(),
item.get_alias(), _identifier_is_function(item))
示例4
def group_period(tlist):
def match(token):
return token.match(T.Punctuation, '.')
def valid_prev(token):
sqlcls = sql.SquareBrackets, sql.Identifier
ttypes = T.Name, T.String.Symbol
return imt(token, i=sqlcls, t=ttypes)
def valid_next(token):
# issue261, allow invalid next token
return True
def post(tlist, pidx, tidx, nidx):
# next_ validation is being performed here. issue261
sqlcls = sql.SquareBrackets, sql.Function
ttypes = T.Name, T.String.Symbol, T.Wildcard
next_ = tlist[nidx] if nidx is not None else None
valid_next = imt(next_, i=sqlcls, t=ttypes)
return (pidx, nidx) if valid_next else (pidx, tidx)
_group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
示例5
def group_comparison(tlist):
sqlcls = (sql.Parenthesis, sql.Function, sql.Identifier,
sql.Operation)
ttypes = T_NUMERICAL + T_STRING + T_NAME
def match(token):
return token.ttype == T.Operator.Comparison
def valid(token):
if imt(token, t=ttypes, i=sqlcls):
return True
elif token and token.is_keyword and token.normalized == 'NULL':
return True
else:
return False
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.Comparison, match,
valid_prev, valid_next, post, extend=False)
示例6
def group_arrays(tlist):
sqlcls = sql.SquareBrackets, sql.Identifier, sql.Function
ttypes = T.Name, T.String.Symbol
def match(token):
return isinstance(token, sql.SquareBrackets)
def valid_prev(token):
return imt(token, i=sqlcls, t=ttypes)
def valid_next(token):
return True
def post(tlist, pidx, tidx, nidx):
return pidx, tidx
_group(tlist, sql.Identifier, match,
valid_prev, valid_next, post, extend=True, recurse=False)
示例7
def group_operator(tlist):
ttypes = T_NUMERICAL + T_STRING + T_NAME
sqlcls = (sql.SquareBrackets, sql.Parenthesis, sql.Function,
sql.Identifier, sql.Operation)
def match(token):
return imt(token, t=(T.Operator, T.Wildcard))
def valid(token):
return imt(token, i=sqlcls, t=ttypes)
def post(tlist, pidx, tidx, nidx):
tlist[tidx].ttype = T.Operator
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.Operation, match,
valid_prev, valid_next, post, extend=False)
示例8
def group_identifier_list(tlist):
m_role = T.Keyword, ('null', 'role')
sqlcls = (sql.Function, sql.Case, sql.Identifier, sql.Comparison,
sql.IdentifierList, sql.Operation)
ttypes = (T_NUMERICAL + T_STRING + T_NAME +
(T.Keyword, T.Comment, T.Wildcard))
def match(token):
return token.match(T.Punctuation, ',')
def valid(token):
return imt(token, i=sqlcls, m=m_role, t=ttypes)
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.IdentifierList, match,
valid_prev, valid_next, post, extend=True)
示例9
def group_aliased(tlist):
clss = (sql.Identifier, sql.Function, sql.Case)
[group_aliased(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, clss)]
idx = 0
token = tlist.token_next_by_instance(idx, clss)
while token:
next_ = tlist.token_next(tlist.token_index(token))
if next_ is not None and isinstance(next_, clss):
if not next_.value.upper().startswith('VARCHAR'):
grp = tlist.tokens_between(token, next_)[1:]
token.tokens.extend(grp)
for t in grp:
tlist.tokens.remove(t)
idx = tlist.token_index(token) + 1
token = tlist.token_next_by_instance(idx, clss)
示例10
def pop(self):
next_val = self.peek()
self.index += 1
# We need to handle three cases here where the next_val could be:
# 1. <table_name> ('business')
# 2. <database_name>.<table_name> ('yelp.business')
# 3. <database_name>.<table_name> <extended_query>
# ('yelp.business change col_one col_two')
# In all the cases we should return a token consisting of only the table
# name or if the database name is present then the database name and the
# table name. Case #3 occurs because SQLParse incorrectly parses certain
# queries.
if isinstance(next_val, Identifier):
tokens = next_val.tokens
if len(tokens) > 1 and tokens[1].value == '.':
str_token = "{db_name}{punctuation}{table_name}".format(
db_name=tokens[0].value,
punctuation=tokens[1].value,
table_name=tokens[2].value
)
return TK(Token.Name, str_token)
else:
return next_val.token_first()
return next_val
示例11
def group_aliased(tlist):
clss = (sql.Identifier, sql.Function, sql.Case)
[group_aliased(sgroup) for sgroup in tlist.get_sublists()
if not isinstance(sgroup, clss)]
idx = 0
token = tlist.token_next_by_instance(idx, clss)
while token:
next_ = tlist.token_next(tlist.token_index(token))
if next_ is not None and isinstance(next_, clss):
# for jython str.upper()
# if not next_.value.upper().startswith('VARCHAR'):
text = next_.value
if sys.version_info[0] < 3 and isinstance(text, str):
text = text.decode('utf-8').upper().encode('utf-8')
if not text.startswith('VARCHAR'):
grp = tlist.tokens_between(token, next_)[1:]
token.tokens.extend(grp)
for t in grp:
tlist.tokens.remove(t)
idx = tlist.token_index(token) + 1
token = tlist.token_next_by_instance(idx, clss)
示例12
def __custom_process_parenthesis_order(self, parenthesis):
open_punc = parenthesis.token_next_match(0, T.Punctuation, '(')
close_punc = parenthesis.token_next_match(open_punc, T.Punctuation, ')')
self.indent += 2
parenthesis.insert_after(open_punc, self.nl())
for token in parenthesis.tokens_between(open_punc, close_punc)[1:-1]:
if isinstance(token, Phrase):
parenthesis.insert_before(token, self.nl())
self._process_phrase(token, kwds=False)
parenthesis.insert_after(token, self.nl_with_indent(1))
elif isinstance(token, sql.Identifier) and len(token.tokens)==1 and isinstance(token.tokens[0], Phrase):
# 中がPhraseのIdentifier
child_token = token.tokens[0]
parenthesis.insert_before(token, self.nl())
self._process_phrase(child_token, kwds=False)
parenthesis.insert_after(token, self.nl_with_indent(1))
elif token.is_group():
self._process(token)
self.indent -= 1
parenthesis.insert_before(close_punc, self.nl())
self.indent -= 1
示例13
def init_group_token(self, token):
tokens = token.get_target_tokens()
with_token = tokens[0]
start_prev = with_token
end = None
for tkn in tokens[1:]:
if tu.is_comma(tkn):
start = tu.token_next_enable(token, start_prev)
token.group_tokens(sql.Identifier, token.tokens_between(start, end))
start_prev = tkn
continue
end = tkn
start = tu.token_next_enable(token, with_token)
end = tu.token_prev_enable(token)
token.group_tokens(sql.IdentifierList, token.tokens_between(start, end))
示例14
def _validate_order_by_and_generate_token(cls, order_by):
try:
parsed = sqlparse.parse(order_by)
except Exception:
raise MlflowException(f"Error on parsing order_by clause '{order_by}'",
error_code=INVALID_PARAMETER_VALUE)
if len(parsed) != 1 or not isinstance(parsed[0], Statement):
raise MlflowException(f"Invalid order_by clause '{order_by}'. Could not be parsed.",
error_code=INVALID_PARAMETER_VALUE)
statement = parsed[0]
if len(statement.tokens) == 1 and isinstance(statement[0], Identifier):
token_value = statement.tokens[0].value
elif len(statement.tokens) == 1 and \
statement.tokens[0].match(ttype=TokenType.Keyword,
values=[cls.ORDER_BY_KEY_TIMESTAMP]):
token_value = cls.ORDER_BY_KEY_TIMESTAMP
elif statement.tokens[0].match(ttype=TokenType.Keyword,
values=[cls.ORDER_BY_KEY_TIMESTAMP])\
and all([token.is_whitespace for token in statement.tokens[1:-1]])\
and statement.tokens[-1].ttype == TokenType.Keyword.Order:
token_value = cls.ORDER_BY_KEY_TIMESTAMP + ' ' + statement.tokens[-1].value
else:
raise MlflowException(f"Invalid order_by clause '{order_by}'. Could not be parsed.",
error_code=INVALID_PARAMETER_VALUE)
return token_value
示例15
def parse_partial_identifier(word):
"""Attempt to parse a (partially typed) word as an identifier
word may include a schema qualification, like `schema_name.partial_name`
or `schema_name.` There may also be unclosed quotation marks, like
`"schema`, or `schema."partial_name`
:param word: string representing a (partially complete) identifier
:return: sqlparse.sql.Identifier, or None
"""
p = sqlparse.parse(word)[0]
n_tok = len(p.tokens)
if n_tok == 1 and isinstance(p.tokens[0], Identifier):
return p.tokens[0]
if p.token_next_by(m=(Error, '"'))[1]:
# An unmatched double quote, e.g. '"foo', 'foo."', or 'foo."bar'
# Close the double quote, then reparse
return parse_partial_identifier(word + '"')
return None
示例16
def _identifiers(tok):
if isinstance(tok, IdentifierList):
for t in tok.get_identifiers():
# NB: IdentifierList.get_identifiers() can return non-identifiers!
if isinstance(t, Identifier):
yield t
elif isinstance(tok, Identifier):
yield tok
示例17
def extract_table_identifiers(token_stream):
"""yields tuples of (schema_name, table_name, table_alias)"""
for item in token_stream:
if isinstance(item, IdentifierList):
for identifier in item.get_identifiers():
# Sometimes Keywords (such as FROM ) are classified as
# identifiers which don't have the get_real_name() method.
try:
schema_name = identifier.get_parent_name()
real_name = identifier.get_real_name()
except AttributeError:
continue
if real_name:
yield (schema_name, real_name, identifier.get_alias())
elif isinstance(item, Identifier):
real_name = item.get_real_name()
schema_name = item.get_parent_name()
if real_name:
yield (schema_name, real_name, item.get_alias())
else:
name = item.get_name()
yield (None, name, item.get_alias() or name)
elif isinstance(item, Function):
yield (None, item.get_name(), item.get_name())
# extract_tables is inspired from examples in the sqlparse lib.
示例18
def group_typecasts(tlist):
def match(token):
return token.match(T.Punctuation, '::')
def valid(token):
return token is not None
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
valid_prev = valid_next = valid
_group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
示例19
def group_as(tlist):
def match(token):
return token.is_keyword and token.normalized == 'AS'
def valid_prev(token):
return token.normalized == 'NULL' or not token.is_keyword
def valid_next(token):
ttypes = T.DML, T.DDL
return not imt(token, t=ttypes) and token is not None
def post(tlist, pidx, tidx, nidx):
return pidx, nidx
_group(tlist, sql.Identifier, match, valid_prev, valid_next, post)
示例20
def group_aliased(tlist):
I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
sql.Operation)
tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
while token:
nidx, next_ = tlist.token_next(tidx)
if isinstance(next_, sql.Identifier):
tlist.group_tokens(sql.Identifier, tidx, nidx, extend=True)
tidx, token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=tidx)
示例21
def group_order(tlist):
"""Group together Identifier and Asc/Desc token"""
tidx, token = tlist.token_next_by(t=T.Keyword.Order)
while token:
pidx, prev_ = tlist.token_prev(tidx)
if imt(prev_, i=sql.Identifier, t=T.Number):
tlist.group_tokens(sql.Identifier, pidx, tidx)
tidx = pidx
tidx, token = tlist.token_next_by(t=T.Keyword.Order, idx=tidx)
示例22
def extract_table_identifiers(token_stream):
for item in token_stream:
if isinstance(item, IdentifierList):
for identifier in item.get_identifiers():
yield identifier.get_name()
elif isinstance(item, Identifier):
yield item.get_name()
# It's a bug to check for Keyword here, but in the example
# above some tables names are identified as keywords...
elif item.ttype is Keyword:
yield item.value
示例23
def group_as(tlist):
def _right_valid(token):
# Currently limited to DML/DDL. Maybe additional more non SQL reserved
# keywords should appear here (see issue8).
return not token.ttype in (T.DML, T.DDL)
def _left_valid(token):
if token.ttype is T.Keyword and token.value in ('NULL',):
return True
return token.ttype is not T.Keyword
_group_left_right(tlist, T.Keyword, 'AS', sql.Identifier,
check_right=_right_valid,
check_left=_left_valid)
示例24
def group_comparison(tlist):
def _parts_valid(token):
return (token.ttype in (T.String.Symbol, T.String.Single,
T.Name, T.Number, T.Number.Float,
T.Number.Integer, T.Literal,
T.Literal.Number.Integer, T.Name.Placeholder)
or isinstance(token, (sql.Identifier, sql.Parenthesis))
or (token.ttype is T.Keyword
and token.value.upper() in ['NULL', ]))
_group_left_right(tlist, T.Operator.Comparison, None, sql.Comparison,
check_left=_parts_valid, check_right=_parts_valid)
示例25
def group_order(tlist):
idx = 0
token = tlist.token_next_by_type(idx, T.Keyword.Order)
while token:
prev = tlist.token_prev(token)
if isinstance(prev, sql.Identifier):
ido = tlist.group_tokens(sql.Identifier,
tlist.tokens_between(prev, token))
idx = tlist.token_index(ido) + 1
else:
idx = tlist.token_index(token) + 1
token = tlist.token_next_by_type(idx, T.Keyword.Order)
示例26
def test_issue39(self):
p = sqlparse.parse('select user.id from user')[0]
self.assertEqual(len(p.tokens), 7)
idt = p.tokens[2]
self.assertEqual(idt.__class__, sql.Identifier)
self.assertEqual(len(idt.tokens), 3)
self.assertEqual(idt.tokens[0].match(T.Name, 'user'), True)
self.assertEqual(idt.tokens[1].match(T.Punctuation, '.'), True)
self.assertEqual(idt.tokens[2].match(T.Name, 'id'), True)
示例27
def test_issue40(self):
# make sure identifier lists in subselects are grouped
p = sqlparse.parse(('SELECT id, name FROM '
'(SELECT id, name FROM bar) as foo'))[0]
self.assertEqual(len(p.tokens), 7)
self.assertEqual(p.tokens[2].__class__, sql.IdentifierList)
self.assertEqual(p.tokens[-1].__class__, sql.Identifier)
self.assertEqual(p.tokens[-1].get_name(), u'foo')
sp = p.tokens[-1].tokens[0]
self.assertEqual(sp.tokens[3].__class__, sql.IdentifierList)
# make sure that formatting works as expected
self.ndiffAssertEqual(
sqlparse.format(('SELECT id, name FROM '
'(SELECT id, name FROM bar)'),
reindent=True),
('SELECT id,\n'
' name\n'
'FROM\n'
' (SELECT id,\n'
' name\n'
' FROM bar)'))
self.ndiffAssertEqual(
sqlparse.format(('SELECT id, name FROM '
'(SELECT id, name FROM bar) as foo'),
reindent=True),
('SELECT id,\n'
' name\n'
'FROM\n'
' (SELECT id,\n'
' name\n'
' FROM bar) as foo'))
示例28
def test_issue78():
# the bug author provided this nice examples, let's use them!
def _get_identifier(sql):
p = sqlparse.parse(sql)[0]
return p.tokens[2]
results = (('get_name', 'z'),
('get_real_name', 'y'),
('get_parent_name', 'x'),
('get_alias', 'z'),
('get_typecast', 'text'))
variants = (
'select x.y::text as z from foo',
'select x.y::text as "z" from foo',
'select x."y"::text as z from foo',
'select x."y"::text as "z" from foo',
'select "x".y::text as z from foo',
'select "x".y::text as "z" from foo',
'select "x"."y"::text as z from foo',
'select "x"."y"::text as "z" from foo',
)
for variant in variants:
i = _get_identifier(variant)
assert isinstance(i, sql.Identifier)
for func_name, result in results:
func = getattr(i, func_name)
assert func() == result
示例29
def test_parenthesis(self):
s = 'select (select (x3) x2) and (y2) bar'
parsed = sqlparse.parse(s)[0]
self.ndiffAssertEqual(s, str(parsed))
self.assertEqual(len(parsed.tokens), 7)
self.assert_(isinstance(parsed.tokens[2], sql.Parenthesis))
self.assert_(isinstance(parsed.tokens[-1], sql.Identifier))
self.assertEqual(len(parsed.tokens[2].tokens), 5)
self.assert_(isinstance(parsed.tokens[2].tokens[3], sql.Identifier))
self.assert_(isinstance(parsed.tokens[2].tokens[3].tokens[0], sql.Parenthesis))
self.assertEqual(len(parsed.tokens[2].tokens[3].tokens), 3)
示例30
def test_identifier_wildcard(self):
p = sqlparse.parse('a.*, b.id')[0]
self.assert_(isinstance(p.tokens[0], sql.IdentifierList))
self.assert_(isinstance(p.tokens[0].tokens[0], sql.Identifier))
self.assert_(isinstance(p.tokens[0].tokens[-1], sql.Identifier))