Complete Yocto mirror with license table for TQMa6UL (2038-compliance)
- 264 license table entries with exact download URLs (224/264 resolved) - Complete sources/ directory with all BitBake recipes - Build configuration: tqma6ul-multi-mba6ulx, spaetzle (musl) - Full traceability for Softwarefreigabeantrag - GCC 13.4.0, Linux 6.6.102, U-Boot 2023.04, musl 1.2.4 - License distribution: GPL-2.0 (24), MIT (23), GPL-2.0+ (18), BSD-3 (16)
This commit is contained in:
883
sources/poky/bitbake/lib/bb/pysh/pyshlex.py
Normal file
883
sources/poky/bitbake/lib/bb/pysh/pyshlex.py
Normal file
@@ -0,0 +1,883 @@
|
||||
# pyshlex.py - PLY compatible lexer for pysh.
|
||||
#
|
||||
# Copyright 2007 Patrick Mezard
|
||||
#
|
||||
# This software may be used and distributed according to the terms
|
||||
# of the GNU General Public License, incorporated herein by reference.
|
||||
|
||||
# TODO:
|
||||
# - review all "char in 'abc'" snippets: the empty string can be matched
|
||||
# - test line continuations within quoted/expansion strings
|
||||
# - eof is buggy wrt sublexers
|
||||
# - the lexer cannot really work in pull mode as it would be required to run
|
||||
# PLY in pull mode. It was designed to work incrementally and it would not be
|
||||
# that hard to enable pull mode.
|
||||
import re
|
||||
|
||||
from ply import lex
|
||||
from bb.pysh.sherrors import *
|
||||
|
||||
class NeedMore(Exception):
|
||||
pass
|
||||
|
||||
def is_blank(c):
|
||||
return c in (' ', '\t')
|
||||
|
||||
_RE_DIGITS = re.compile(r'^\d+$')
|
||||
|
||||
def are_digits(s):
|
||||
return _RE_DIGITS.search(s) is not None
|
||||
|
||||
_OPERATORS = dict([
|
||||
('&&', 'AND_IF'),
|
||||
('||', 'OR_IF'),
|
||||
(';;', 'DSEMI'),
|
||||
('<<', 'DLESS'),
|
||||
('>>', 'DGREAT'),
|
||||
('<&', 'LESSAND'),
|
||||
('>&', 'GREATAND'),
|
||||
('<>', 'LESSGREAT'),
|
||||
('<<-', 'DLESSDASH'),
|
||||
('>|', 'CLOBBER'),
|
||||
('&', 'AMP'),
|
||||
(';', 'COMMA'),
|
||||
('<', 'LESS'),
|
||||
('>', 'GREATER'),
|
||||
('(', 'LPARENS'),
|
||||
(')', 'RPARENS'),
|
||||
])
|
||||
|
||||
#Make a function to silence pychecker "Local variable shadows global"
|
||||
def make_partial_ops():
|
||||
partials = {}
|
||||
for k in _OPERATORS:
|
||||
for i in range(1, len(k)+1):
|
||||
partials[k[:i]] = None
|
||||
return partials
|
||||
|
||||
_PARTIAL_OPERATORS = make_partial_ops()
|
||||
|
||||
def is_partial_op(s):
|
||||
"""Return True if s matches a non-empty subpart of an operator starting
|
||||
at its first character.
|
||||
"""
|
||||
return s in _PARTIAL_OPERATORS
|
||||
|
||||
def is_op(s):
|
||||
"""If s matches an operator, returns the operator identifier. Return None
|
||||
otherwise.
|
||||
"""
|
||||
return _OPERATORS.get(s)
|
||||
|
||||
_RESERVEDS = dict([
|
||||
('if', 'If'),
|
||||
('then', 'Then'),
|
||||
('else', 'Else'),
|
||||
('elif', 'Elif'),
|
||||
('fi', 'Fi'),
|
||||
('do', 'Do'),
|
||||
('done', 'Done'),
|
||||
('case', 'Case'),
|
||||
('esac', 'Esac'),
|
||||
('while', 'While'),
|
||||
('until', 'Until'),
|
||||
('for', 'For'),
|
||||
('{', 'Lbrace'),
|
||||
('}', 'Rbrace'),
|
||||
('!', 'Bang'),
|
||||
('in', 'In'),
|
||||
('|', 'PIPE'),
|
||||
])
|
||||
|
||||
def get_reserved(s):
|
||||
return _RESERVEDS.get(s)
|
||||
|
||||
_RE_NAME = re.compile(r'^[0-9a-zA-Z_]+$')
|
||||
|
||||
def is_name(s):
|
||||
return _RE_NAME.search(s) is not None
|
||||
|
||||
def find_chars(seq, chars):
|
||||
for i,v in enumerate(seq):
|
||||
if v in chars:
|
||||
return i,v
|
||||
return -1, None
|
||||
|
||||
class WordLexer:
|
||||
"""WordLexer parse quoted or expansion expressions and return an expression
|
||||
tree. The input string can be any well formed sequence beginning with quoting
|
||||
or expansion character. Embedded expressions are handled recursively. The
|
||||
resulting tree is made of lists and strings. Lists represent quoted or
|
||||
expansion expressions. Each list first element is the opening separator,
|
||||
the last one the closing separator. In-between can be any number of strings
|
||||
or lists for sub-expressions. Non quoted/expansion expression can written as
|
||||
strings or as lists with empty strings as starting and ending delimiters.
|
||||
"""
|
||||
|
||||
NAME_CHARSET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
|
||||
NAME_CHARSET = dict(zip(NAME_CHARSET, NAME_CHARSET))
|
||||
|
||||
SPECIAL_CHARSET = '@*#?-$!0'
|
||||
|
||||
#Characters which can be escaped depends on the current delimiters
|
||||
ESCAPABLE = {
|
||||
'`': set(['$', '\\', '`']),
|
||||
'"': set(['$', '\\', '`', '"']),
|
||||
"'": set(),
|
||||
}
|
||||
|
||||
def __init__(self, heredoc = False):
|
||||
# _buffer is the unprocessed input characters buffer
|
||||
self._buffer = []
|
||||
# _stack is empty or contains a quoted list being processed
|
||||
# (this is the DFS path to the quoted expression being evaluated).
|
||||
self._stack = []
|
||||
self._escapable = None
|
||||
# True when parsing unquoted here documents
|
||||
self._heredoc = heredoc
|
||||
|
||||
def add(self, data, eof=False):
|
||||
"""Feed the lexer with more data. If the quoted expression can be
|
||||
delimited, return a tuple (expr, remaining) containing the expression
|
||||
tree and the unconsumed data.
|
||||
Otherwise, raise NeedMore.
|
||||
"""
|
||||
self._buffer += list(data)
|
||||
self._parse(eof)
|
||||
|
||||
result = self._stack[0]
|
||||
remaining = ''.join(self._buffer)
|
||||
self._stack = []
|
||||
self._buffer = []
|
||||
return result, remaining
|
||||
|
||||
def _is_escapable(self, c, delim=None):
|
||||
if delim is None:
|
||||
if self._heredoc:
|
||||
# Backslashes works as if they were double quoted in unquoted
|
||||
# here-documents
|
||||
delim = '"'
|
||||
else:
|
||||
if len(self._stack)<=1:
|
||||
return True
|
||||
delim = self._stack[-2][0]
|
||||
|
||||
escapables = self.ESCAPABLE.get(delim, None)
|
||||
return escapables is None or c in escapables
|
||||
|
||||
def _parse_squote(self, buf, result, eof):
|
||||
if not buf:
|
||||
raise NeedMore()
|
||||
try:
|
||||
pos = buf.index("'")
|
||||
except ValueError:
|
||||
raise NeedMore()
|
||||
result[-1] += ''.join(buf[:pos])
|
||||
result += ["'"]
|
||||
return pos+1, True
|
||||
|
||||
def _parse_bquote(self, buf, result, eof):
|
||||
if not buf:
|
||||
raise NeedMore()
|
||||
|
||||
if buf[0]=='\n':
|
||||
#Remove line continuations
|
||||
result[:] = ['', '', '']
|
||||
elif self._is_escapable(buf[0]):
|
||||
result[-1] += buf[0]
|
||||
result += ['']
|
||||
else:
|
||||
#Keep as such
|
||||
result[:] = ['', '\\'+buf[0], '']
|
||||
|
||||
return 1, True
|
||||
|
||||
def _parse_dquote(self, buf, result, eof):
|
||||
if not buf:
|
||||
raise NeedMore()
|
||||
pos, sep = find_chars(buf, '$\\`"')
|
||||
if pos==-1:
|
||||
raise NeedMore()
|
||||
|
||||
result[-1] += ''.join(buf[:pos])
|
||||
if sep=='"':
|
||||
result += ['"']
|
||||
return pos+1, True
|
||||
else:
|
||||
#Keep everything until the separator and defer processing
|
||||
return pos, False
|
||||
|
||||
def _parse_command(self, buf, result, eof):
|
||||
if not buf:
|
||||
raise NeedMore()
|
||||
|
||||
chars = '$\\`"\''
|
||||
if result[0] == '$(':
|
||||
chars += ')'
|
||||
pos, sep = find_chars(buf, chars)
|
||||
if pos == -1:
|
||||
raise NeedMore()
|
||||
|
||||
result[-1] += ''.join(buf[:pos])
|
||||
if (result[0]=='$(' and sep==')') or (result[0]=='`' and sep=='`'):
|
||||
result += [sep]
|
||||
return pos+1, True
|
||||
else:
|
||||
return pos, False
|
||||
|
||||
def _parse_parameter(self, buf, result, eof):
|
||||
if not buf:
|
||||
raise NeedMore()
|
||||
|
||||
pos, sep = find_chars(buf, '$\\`"\'}')
|
||||
if pos==-1:
|
||||
raise NeedMore()
|
||||
|
||||
result[-1] += ''.join(buf[:pos])
|
||||
if sep=='}':
|
||||
result += [sep]
|
||||
return pos+1, True
|
||||
else:
|
||||
return pos, False
|
||||
|
||||
def _parse_dollar(self, buf, result, eof):
|
||||
sep = result[0]
|
||||
if sep=='$':
|
||||
if not buf:
|
||||
#TODO: handle empty $
|
||||
raise NeedMore()
|
||||
if buf[0]=='(':
|
||||
if len(buf)==1:
|
||||
raise NeedMore()
|
||||
|
||||
if buf[1]=='(':
|
||||
result[0] = '$(('
|
||||
buf[:2] = []
|
||||
else:
|
||||
result[0] = '$('
|
||||
buf[:1] = []
|
||||
|
||||
elif buf[0]=='{':
|
||||
result[0] = '${'
|
||||
buf[:1] = []
|
||||
else:
|
||||
if buf[0] in self.SPECIAL_CHARSET:
|
||||
result[-1] = buf[0]
|
||||
read = 1
|
||||
else:
|
||||
for read,c in enumerate(buf):
|
||||
if c not in self.NAME_CHARSET:
|
||||
break
|
||||
else:
|
||||
if not eof:
|
||||
raise NeedMore()
|
||||
read += 1
|
||||
|
||||
result[-1] += ''.join(buf[0:read])
|
||||
|
||||
if not result[-1]:
|
||||
result[:] = ['', result[0], '']
|
||||
else:
|
||||
result += ['']
|
||||
return read,True
|
||||
|
||||
sep = result[0]
|
||||
if sep=='$(':
|
||||
parsefunc = self._parse_command
|
||||
elif sep=='${':
|
||||
parsefunc = self._parse_parameter
|
||||
else:
|
||||
raise NotImplementedError(sep)
|
||||
|
||||
pos, closed = parsefunc(buf, result, eof)
|
||||
return pos, closed
|
||||
|
||||
def _parse(self, eof):
|
||||
buf = self._buffer
|
||||
stack = self._stack
|
||||
recurse = False
|
||||
|
||||
while 1:
|
||||
if not stack or recurse:
|
||||
if not buf:
|
||||
raise NeedMore()
|
||||
if buf[0] not in ('"\\`$\''):
|
||||
raise ShellSyntaxError('Invalid quoted string sequence')
|
||||
stack.append([buf[0], ''])
|
||||
buf[:1] = []
|
||||
recurse = False
|
||||
|
||||
result = stack[-1]
|
||||
if result[0]=="'":
|
||||
parsefunc = self._parse_squote
|
||||
elif result[0]=='\\':
|
||||
parsefunc = self._parse_bquote
|
||||
elif result[0]=='"':
|
||||
parsefunc = self._parse_dquote
|
||||
elif result[0]=='`':
|
||||
parsefunc = self._parse_command
|
||||
elif result[0][0]=='$':
|
||||
parsefunc = self._parse_dollar
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
read, closed = parsefunc(buf, result, eof)
|
||||
|
||||
buf[:read] = []
|
||||
if closed:
|
||||
if len(stack)>1:
|
||||
#Merge in parent expression
|
||||
parsed = stack.pop()
|
||||
stack[-1] += [parsed]
|
||||
stack[-1] += ['']
|
||||
else:
|
||||
break
|
||||
else:
|
||||
recurse = True
|
||||
|
||||
def normalize_wordtree(wtree):
|
||||
"""Fold back every literal sequence (delimited with empty strings) into
|
||||
parent sequence.
|
||||
"""
|
||||
def normalize(wtree):
|
||||
result = []
|
||||
for part in wtree[1:-1]:
|
||||
if isinstance(part, list):
|
||||
part = normalize(part)
|
||||
if part[0]=='':
|
||||
#Move the part content back at current level
|
||||
result += part[1:-1]
|
||||
continue
|
||||
elif not part:
|
||||
#Remove empty strings
|
||||
continue
|
||||
result.append(part)
|
||||
if not result:
|
||||
result = ['']
|
||||
return [wtree[0]] + result + [wtree[-1]]
|
||||
|
||||
return normalize(wtree)
|
||||
|
||||
|
||||
def make_wordtree(token, here_document=False):
|
||||
"""Parse a delimited token and return a tree similar to the ones returned by
|
||||
WordLexer. token may contain any combinations of expansion/quoted fields and
|
||||
non-ones.
|
||||
"""
|
||||
tree = ['']
|
||||
remaining = token
|
||||
delimiters = '\\$`'
|
||||
if not here_document:
|
||||
delimiters += '\'"'
|
||||
|
||||
while 1:
|
||||
pos, sep = find_chars(remaining, delimiters)
|
||||
if pos==-1:
|
||||
tree += [remaining, '']
|
||||
return normalize_wordtree(tree)
|
||||
tree.append(remaining[:pos])
|
||||
remaining = remaining[pos:]
|
||||
|
||||
try:
|
||||
result, remaining = WordLexer(heredoc = here_document).add(remaining, True)
|
||||
except NeedMore:
|
||||
raise ShellSyntaxError('Invalid token "%s"')
|
||||
tree.append(result)
|
||||
|
||||
|
||||
def wordtree_as_string(wtree):
|
||||
"""Rewrite an expression tree generated by make_wordtree as string."""
|
||||
def visit(node, output):
|
||||
for child in node:
|
||||
if isinstance(child, list):
|
||||
visit(child, output)
|
||||
else:
|
||||
output.append(child)
|
||||
|
||||
output = []
|
||||
visit(wtree, output)
|
||||
return ''.join(output)
|
||||
|
||||
|
||||
def unquote_wordtree(wtree):
|
||||
"""Fold the word tree while removing quotes everywhere. Other expansion
|
||||
sequences are joined as such.
|
||||
"""
|
||||
def unquote(wtree):
|
||||
unquoted = []
|
||||
if wtree[0] in ('', "'", '"', '\\'):
|
||||
wtree = wtree[1:-1]
|
||||
|
||||
for part in wtree:
|
||||
if isinstance(part, list):
|
||||
part = unquote(part)
|
||||
unquoted.append(part)
|
||||
return ''.join(unquoted)
|
||||
|
||||
return unquote(wtree)
|
||||
|
||||
|
||||
class HereDocLexer:
|
||||
"""HereDocLexer delimits whatever comes from the here-document starting newline
|
||||
not included to the closing delimiter line included.
|
||||
"""
|
||||
def __init__(self, op, delim):
|
||||
assert op in ('<<', '<<-')
|
||||
if not delim:
|
||||
raise ShellSyntaxError('invalid here document delimiter %s' % str(delim))
|
||||
|
||||
self._op = op
|
||||
self._delim = delim
|
||||
self._buffer = []
|
||||
self._token = []
|
||||
|
||||
def add(self, data, eof):
|
||||
"""If the here-document was delimited, return a tuple (content, remaining).
|
||||
Raise NeedMore() otherwise.
|
||||
"""
|
||||
self._buffer += list(data)
|
||||
self._parse(eof)
|
||||
token = ''.join(self._token)
|
||||
remaining = ''.join(self._buffer)
|
||||
self._token, self._remaining = [], []
|
||||
return token, remaining
|
||||
|
||||
def _parse(self, eof):
|
||||
while 1:
|
||||
#Look for first unescaped newline. Quotes may be ignored
|
||||
escaped = False
|
||||
for i,c in enumerate(self._buffer):
|
||||
if escaped:
|
||||
escaped = False
|
||||
elif c=='\\':
|
||||
escaped = True
|
||||
elif c=='\n':
|
||||
break
|
||||
else:
|
||||
i = -1
|
||||
|
||||
if i==-1 or self._buffer[i]!='\n':
|
||||
if not eof:
|
||||
raise NeedMore()
|
||||
#No more data, maybe the last line is closing delimiter
|
||||
line = ''.join(self._buffer)
|
||||
eol = ''
|
||||
self._buffer[:] = []
|
||||
else:
|
||||
line = ''.join(self._buffer[:i])
|
||||
eol = self._buffer[i]
|
||||
self._buffer[:i+1] = []
|
||||
|
||||
if self._op=='<<-':
|
||||
line = line.lstrip('\t')
|
||||
|
||||
if line==self._delim:
|
||||
break
|
||||
|
||||
self._token += [line, eol]
|
||||
if i==-1:
|
||||
break
|
||||
|
||||
class Token:
|
||||
#TODO: check this is still in use
|
||||
OPERATOR = 'OPERATOR'
|
||||
WORD = 'WORD'
|
||||
|
||||
def __init__(self):
|
||||
self.value = ''
|
||||
self.type = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
#Behave like a two elements tuple
|
||||
if key==0:
|
||||
return self.type
|
||||
if key==1:
|
||||
return self.value
|
||||
raise IndexError(key)
|
||||
|
||||
|
||||
class HereDoc:
|
||||
def __init__(self, op, name=None):
|
||||
self.op = op
|
||||
self.name = name
|
||||
self.pendings = []
|
||||
|
||||
TK_COMMA = 'COMMA'
|
||||
TK_AMPERSAND = 'AMP'
|
||||
TK_OP = 'OP'
|
||||
TK_TOKEN = 'TOKEN'
|
||||
TK_COMMENT = 'COMMENT'
|
||||
TK_NEWLINE = 'NEWLINE'
|
||||
TK_IONUMBER = 'IO_NUMBER'
|
||||
TK_ASSIGNMENT = 'ASSIGNMENT_WORD'
|
||||
TK_HERENAME = 'HERENAME'
|
||||
|
||||
class Lexer:
|
||||
"""Main lexer.
|
||||
|
||||
Call add() until the script AST is returned.
|
||||
"""
|
||||
# Here-document handling makes the whole thing more complex because they basically
|
||||
# force tokens to be reordered: here-content must come right after the operator
|
||||
# and the here-document name, while some other tokens might be following the
|
||||
# here-document expression on the same line.
|
||||
#
|
||||
# So, here-doc states are basically:
|
||||
# *self._state==ST_NORMAL
|
||||
# - self._heredoc.op is None: no here-document
|
||||
# - self._heredoc.op is not None but name is: here-document operator matched,
|
||||
# waiting for the document name/delimiter
|
||||
# - self._heredoc.op and name are not None: here-document is ready, following
|
||||
# tokens are being stored and will be pushed again when the document is
|
||||
# completely parsed.
|
||||
# *self._state==ST_HEREDOC
|
||||
# - The here-document is being delimited by self._herelexer. Once it is done
|
||||
# the content is pushed in front of the pending token list then all these
|
||||
# tokens are pushed once again.
|
||||
ST_NORMAL = 'ST_NORMAL'
|
||||
ST_OP = 'ST_OP'
|
||||
ST_BACKSLASH = 'ST_BACKSLASH'
|
||||
ST_QUOTED = 'ST_QUOTED'
|
||||
ST_COMMENT = 'ST_COMMENT'
|
||||
ST_HEREDOC = 'ST_HEREDOC'
|
||||
|
||||
#Match end of backquote strings
|
||||
RE_BACKQUOTE_END = re.compile(r'(?<!\\)(`)')
|
||||
|
||||
def __init__(self, parent_state = None):
|
||||
self._input = []
|
||||
self._pos = 0
|
||||
|
||||
self._token = ''
|
||||
self._type = TK_TOKEN
|
||||
|
||||
self._state = self.ST_NORMAL
|
||||
self._parent_state = parent_state
|
||||
self._wordlexer = None
|
||||
|
||||
self._heredoc = HereDoc(None)
|
||||
self._herelexer = None
|
||||
|
||||
### Following attributes are not used for delimiting token and can safely
|
||||
### be changed after here-document detection (see _push_toke)
|
||||
|
||||
# Count the number of tokens following a 'For' reserved word. Needed to
|
||||
# return an 'In' reserved word if it comes in third place.
|
||||
self._for_count = None
|
||||
|
||||
def add(self, data, eof=False):
|
||||
"""Feed the lexer with data.
|
||||
|
||||
When eof is set to True, returns unconsumed data or raise if the lexer
|
||||
is in the middle of a delimiting operation.
|
||||
Raise NeedMore otherwise.
|
||||
"""
|
||||
self._input += list(data)
|
||||
self._parse(eof)
|
||||
self._input[:self._pos] = []
|
||||
return ''.join(self._input)
|
||||
|
||||
def _parse(self, eof):
|
||||
while self._state:
|
||||
if self._pos>=len(self._input):
|
||||
if not eof:
|
||||
raise NeedMore()
|
||||
elif self._state not in (self.ST_OP, self.ST_QUOTED, self.ST_HEREDOC):
|
||||
#Delimit the current token and leave cleanly
|
||||
self._push_token('')
|
||||
break
|
||||
else:
|
||||
#Let the sublexer handle the eof themselves
|
||||
pass
|
||||
|
||||
if self._state==self.ST_NORMAL:
|
||||
self._parse_normal()
|
||||
elif self._state==self.ST_COMMENT:
|
||||
self._parse_comment()
|
||||
elif self._state==self.ST_OP:
|
||||
self._parse_op(eof)
|
||||
elif self._state==self.ST_QUOTED:
|
||||
self._parse_quoted(eof)
|
||||
elif self._state==self.ST_HEREDOC:
|
||||
self._parse_heredoc(eof)
|
||||
else:
|
||||
assert False, "Unknown state " + str(self._state)
|
||||
|
||||
if self._heredoc.op is not None:
|
||||
raise ShellSyntaxError('missing here-document delimiter')
|
||||
|
||||
def _parse_normal(self):
|
||||
c = self._input[self._pos]
|
||||
if c=='\n':
|
||||
self._push_token(c)
|
||||
self._token = c
|
||||
self._type = TK_NEWLINE
|
||||
self._push_token('')
|
||||
self._pos += 1
|
||||
elif c in ('\\', '\'', '"', '`', '$'):
|
||||
self._state = self.ST_QUOTED
|
||||
elif is_partial_op(c):
|
||||
self._push_token(c)
|
||||
|
||||
self._type = TK_OP
|
||||
self._token += c
|
||||
self._pos += 1
|
||||
self._state = self.ST_OP
|
||||
elif is_blank(c):
|
||||
self._push_token(c)
|
||||
|
||||
#Discard blanks
|
||||
self._pos += 1
|
||||
elif self._token:
|
||||
self._token += c
|
||||
self._pos += 1
|
||||
elif c=='#':
|
||||
self._state = self.ST_COMMENT
|
||||
self._type = TK_COMMENT
|
||||
self._pos += 1
|
||||
else:
|
||||
self._pos += 1
|
||||
self._token += c
|
||||
|
||||
def _parse_op(self, eof):
|
||||
assert self._token
|
||||
|
||||
while 1:
|
||||
if self._pos>=len(self._input):
|
||||
if not eof:
|
||||
raise NeedMore()
|
||||
c = ''
|
||||
else:
|
||||
c = self._input[self._pos]
|
||||
|
||||
op = self._token + c
|
||||
if c and is_partial_op(op):
|
||||
#Still parsing an operator
|
||||
self._token = op
|
||||
self._pos += 1
|
||||
else:
|
||||
#End of operator
|
||||
self._push_token(c)
|
||||
self._state = self.ST_NORMAL
|
||||
break
|
||||
|
||||
def _parse_comment(self):
|
||||
while 1:
|
||||
if self._pos>=len(self._input):
|
||||
raise NeedMore()
|
||||
|
||||
c = self._input[self._pos]
|
||||
if c=='\n':
|
||||
#End of comment, do not consume the end of line
|
||||
self._state = self.ST_NORMAL
|
||||
break
|
||||
else:
|
||||
self._token += c
|
||||
self._pos += 1
|
||||
|
||||
def _parse_quoted(self, eof):
|
||||
"""Precondition: the starting backquote/dollar is still in the input queue."""
|
||||
if not self._wordlexer:
|
||||
self._wordlexer = WordLexer()
|
||||
|
||||
if self._pos<len(self._input):
|
||||
#Transfer input queue character into the subparser
|
||||
input = self._input[self._pos:]
|
||||
self._pos += len(input)
|
||||
|
||||
wtree, remaining = self._wordlexer.add(input, eof)
|
||||
self._wordlexer = None
|
||||
self._token += wordtree_as_string(wtree)
|
||||
|
||||
#Put unparsed character back in the input queue
|
||||
if remaining:
|
||||
self._input[self._pos:self._pos] = list(remaining)
|
||||
self._state = self.ST_NORMAL
|
||||
|
||||
def _parse_heredoc(self, eof):
|
||||
assert not self._token
|
||||
|
||||
if self._herelexer is None:
|
||||
self._herelexer = HereDocLexer(self._heredoc.op, self._heredoc.name)
|
||||
|
||||
if self._pos<len(self._input):
|
||||
#Transfer input queue character into the subparser
|
||||
input = self._input[self._pos:]
|
||||
self._pos += len(input)
|
||||
|
||||
self._token, remaining = self._herelexer.add(input, eof)
|
||||
|
||||
#Reset here-document state
|
||||
self._herelexer = None
|
||||
heredoc, self._heredoc = self._heredoc, HereDoc(None)
|
||||
if remaining:
|
||||
self._input[self._pos:self._pos] = list(remaining)
|
||||
self._state = self.ST_NORMAL
|
||||
|
||||
#Push pending tokens
|
||||
heredoc.pendings[:0] = [(self._token, self._type, heredoc.name)]
|
||||
for token, type, delim in heredoc.pendings:
|
||||
self._token = token
|
||||
self._type = type
|
||||
self._push_token(delim)
|
||||
|
||||
def _push_token(self, delim):
|
||||
if not self._token:
|
||||
return 0
|
||||
|
||||
if self._heredoc.op is not None:
|
||||
if self._heredoc.name is None:
|
||||
#Here-document name
|
||||
if self._type!=TK_TOKEN:
|
||||
raise ShellSyntaxError("expecting here-document name, got '%s'" % self._token)
|
||||
self._heredoc.name = unquote_wordtree(make_wordtree(self._token))
|
||||
self._type = TK_HERENAME
|
||||
else:
|
||||
#Capture all tokens until the newline starting the here-document
|
||||
if self._type==TK_NEWLINE:
|
||||
assert self._state==self.ST_NORMAL
|
||||
self._state = self.ST_HEREDOC
|
||||
|
||||
self._heredoc.pendings.append((self._token, self._type, delim))
|
||||
self._token = ''
|
||||
self._type = TK_TOKEN
|
||||
return 1
|
||||
|
||||
# BEWARE: do not change parser state from here to the end of the function:
|
||||
# when parsing between an here-document operator to the end of the line
|
||||
# tokens are stored in self._heredoc.pendings. Therefore, they will not
|
||||
# reach the section below.
|
||||
|
||||
#Check operators
|
||||
if self._type==TK_OP:
|
||||
#False positive because of partial op matching
|
||||
op = is_op(self._token)
|
||||
if not op:
|
||||
self._type = TK_TOKEN
|
||||
else:
|
||||
#Map to the specific operator
|
||||
self._type = op
|
||||
if self._token in ('<<', '<<-'):
|
||||
#Done here rather than in _parse_op because there is no need
|
||||
#to change the parser state since we are still waiting for
|
||||
#the here-document name
|
||||
if self._heredoc.op is not None:
|
||||
raise ShellSyntaxError("syntax error near token '%s'" % self._token)
|
||||
assert self._heredoc.op is None
|
||||
self._heredoc.op = self._token
|
||||
|
||||
if self._type==TK_TOKEN:
|
||||
if '=' in self._token and not delim:
|
||||
if self._token.startswith('='):
|
||||
#Token is a WORD... a TOKEN that is.
|
||||
pass
|
||||
else:
|
||||
prev = self._token[:self._token.find('=')]
|
||||
if is_name(prev):
|
||||
self._type = TK_ASSIGNMENT
|
||||
else:
|
||||
#Just a token (unspecified)
|
||||
pass
|
||||
else:
|
||||
reserved = get_reserved(self._token)
|
||||
if reserved is not None:
|
||||
if reserved=='In' and self._for_count!=2:
|
||||
#Sorry, not a reserved word after all
|
||||
pass
|
||||
else:
|
||||
self._type = reserved
|
||||
if reserved in ('For', 'Case'):
|
||||
self._for_count = 0
|
||||
elif are_digits(self._token) and delim in ('<', '>'):
|
||||
#Detect IO_NUMBER
|
||||
self._type = TK_IONUMBER
|
||||
elif self._token==';':
|
||||
self._type = TK_COMMA
|
||||
elif self._token=='&':
|
||||
self._type = TK_AMPERSAND
|
||||
elif self._type==TK_COMMENT:
|
||||
#Comments are not part of sh grammar, ignore them
|
||||
self._token = ''
|
||||
self._type = TK_TOKEN
|
||||
return 0
|
||||
|
||||
if self._for_count is not None:
|
||||
#Track token count in 'For' expression to detect 'In' reserved words.
|
||||
#Can only be in third position, no need to go beyond
|
||||
self._for_count += 1
|
||||
if self._for_count==3:
|
||||
self._for_count = None
|
||||
|
||||
self.on_token((self._token, self._type))
|
||||
self._token = ''
|
||||
self._type = TK_TOKEN
|
||||
return 1
|
||||
|
||||
def on_token(self, token):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
tokens = [
|
||||
TK_TOKEN,
|
||||
# To silence yacc unused token warnings
|
||||
# TK_COMMENT,
|
||||
TK_NEWLINE,
|
||||
TK_IONUMBER,
|
||||
TK_ASSIGNMENT,
|
||||
TK_HERENAME,
|
||||
]
|
||||
|
||||
#Add specific operators
|
||||
tokens += _OPERATORS.values()
|
||||
#Add reserved words
|
||||
tokens += _RESERVEDS.values()
|
||||
|
||||
class PLYLexer(Lexer):
|
||||
"""Bridge Lexer and PLY lexer interface."""
|
||||
def __init__(self):
|
||||
Lexer.__init__(self)
|
||||
self._tokens = []
|
||||
self._current = 0
|
||||
self.lineno = 0
|
||||
|
||||
def on_token(self, token):
|
||||
value, type = token
|
||||
|
||||
self.lineno = 0
|
||||
t = lex.LexToken()
|
||||
t.value = value
|
||||
t.type = type
|
||||
t.lexer = self
|
||||
t.lexpos = 0
|
||||
t.lineno = 0
|
||||
|
||||
self._tokens.append(t)
|
||||
|
||||
def is_empty(self):
|
||||
return not bool(self._tokens)
|
||||
|
||||
#PLY compliant interface
|
||||
def token(self):
|
||||
if self._current>=len(self._tokens):
|
||||
return None
|
||||
t = self._tokens[self._current]
|
||||
self._current += 1
|
||||
return t
|
||||
|
||||
|
||||
def get_tokens(s):
|
||||
"""Parse the input string and return a tuple (tokens, unprocessed) where
|
||||
tokens is a list of parsed tokens and unprocessed is the part of the input
|
||||
string left untouched by the lexer.
|
||||
"""
|
||||
lexer = PLYLexer()
|
||||
untouched = lexer.add(s, True)
|
||||
tokens = []
|
||||
while 1:
|
||||
token = lexer.token()
|
||||
if token is None:
|
||||
break
|
||||
tokens.append(token)
|
||||
|
||||
tokens = [(t.value, t.type) for t in tokens]
|
||||
return tokens, untouched
|
||||
Reference in New Issue
Block a user