#
# This file is part of pySMT.
#
# Copyright 2014 Andrea Micheli and Marco Gario
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import re
from collections import namedtuple
import pysmt.typing as types
from pysmt.environment import get_env
from pysmt.exceptions import PysmtSyntaxError, UndefinedSymbolError
from pysmt.constants import Fraction
[docs]
def HRParser(env=None):
"""Parser for HR format of pySMT."""
return PrattParser(HRLexer, env=env)
[docs]
def parse(string):
"""Parse an hr-string."""
return HRParser().parse(string)
# Rules for the Lexer
Rule = namedtuple('Rule', ['regex', 'symbol', 'is_functional'])
class Lexer(object):
"""This class produces a stream of token objects for the Pratt-parser.
The rules for the token are store in self.rules. Call self.compile() after
modifying rules.
"""
def __init__(self, env=None):
if env is None:
env = get_env()
self.env = env
self.mgr = env.formula_manager
self.get_type = env.stc.get_type
self.rules = []
self.scanner = None
self.eoi = EndOfInput()
def compile(self):
self.scanner = re.compile("|".join(rule.regex for rule in self.rules),
re.DOTALL | re.VERBOSE)
def lexing_error(self, read):
raise PysmtSyntaxError("Unexpected input: %s" % read)
def tokenize(self, data):
"""The token generator for the given string"""
for match in re.finditer(self.scanner, data):
for idx, capture in enumerate(match.groups()):
if capture is not None:
rule = self.rules[idx]
symbol = rule.symbol
if rule.symbol is not None:
if rule.is_functional:
yield symbol(capture)
else:
yield symbol
break
yield self.eoi
# EOC Lexer
class GrammarSymbol(object):
"""Base class for all the parsing tokens"""
def __init__(self, lbp=0, value=None, payload=None):
self.lbp = lbp
self.value = value
self.payload = payload
def nud(self, parser):
raise PysmtSyntaxError("Syntax error at token '%s'." % parser.token)
def led(self, parser, left):
raise PysmtSyntaxError("Syntax error at token '%s' (Read: '%s')." % \
(parser.token, left))
#
# Precedence table (from low priority to high priority):
#
# 10 : Xor, Implies, Iff
# 20 : Forall, Exists
# 30 : Or
# 40 : And
# 50 : Not
# 60 : LE, LT, Equals, GE, GT, BVULE, BVUGE, BVULT, BVUGT, BVSLE, BVSGE, BVSGT
# 70 : Plus, Minus, BVAdd, BVSub
# 80 : Times, Div, BVMul, BVUDiv, BVSDiv, BVSRem, BVURem
# 90 : BVLShl, BVLShr, BVAShr, BVConcat, BVXor, BVRor, BVRol, BVZExt, BVSExt, BVComp
# 100 : ToReal Uminus BVNeg BVToNat
# 200 : ()
# 300 : []
class HRLexer(Lexer):
"""Produces a stream of token objects for the Human-Readable format."""
def __init__(self, env=None):
Lexer.__init__(self, env=env)
hr_rules = [
Rule(r"(\s+)", None, False), # whitespace
Rule(r"(-?\d+/\d+)", self.real_constant, True), # fractions
Rule(r"(-?\d+\.\d+)", self.real_constant, True),# decimals
Rule(r"(-?\d+_\d+)", self.bv_constant, True),# bv
Rule(r"(-?\d+)", self.int_constant, True),# integer literals
Rule(r"\"(.*?)\"", self.string_constant, True), # String Constant
Rule(r"BV\{(\d+)\}", self.bv_type, True),# BV Type
Rule(r"(Array\{)", OpenArrayTypeTok(), False),# Array Type
Rule(r"(Int)", IntTypeTok(), False),# Int Type
Rule(r"(Real)", RealTypeTok(), False),# Real Type
Rule(r"(Bool)", BoolTypeTok(), False),# Bool Type
Rule(r"(&)", InfixOpAdapter(self.AndOrBVAnd, 40), False),# conjunction
Rule(r"(\|)", InfixOpAdapter(self.OrOrBVOr, 30), False),# disjunction
Rule(r"(!)", UnaryOpAdapter(self.NotOrBVNot, 50), False),# negation
Rule(r"(\()", OpenPar(), False),# open parenthesis
Rule(r"(\))", ClosePar(), False),# closed parenthesis
Rule(r"(\[)", OpenBrak(), False),# open parenthesis
Rule(r"(\])", CloseBrak(), False),# closed parenthesis
Rule(r"(\})", CloseBrace(), False),# closed parenthesis
Rule(r"(<<)", InfixOpAdapter(self.mgr.BVLShl, 90), False),# Shl
Rule(r"(>>)", InfixOpAdapter(self.mgr.BVLShr, 90), False),# Shr
Rule(r"(a>>)", InfixOpAdapter(self.mgr.BVAShr, 90), False),# AShr
Rule(r"(<->)", InfixOpAdapter(self.mgr.Iff, 10), False),# iff
Rule(r"(->)", InfixOpAdapter(self.mgr.Implies, 10), False),# implies
Rule(r"(u<=)", InfixOpAdapter(self.mgr.BVULE, 60), False),# bvule
Rule(r"(u>=)", InfixOpAdapter(self.mgr.BVUGE, 60), False),# bvuge
Rule(r"(u<)", InfixOpAdapter(self.mgr.BVULT, 60), False),# bvult
Rule(r"(u>)", InfixOpAdapter(self.mgr.BVUGT, 60), False),# bvugt
Rule(r"(s<=)", InfixOpAdapter(self.mgr.BVSLE, 60), False),# bvsle
Rule(r"(s>=)", InfixOpAdapter(self.mgr.BVSGE, 60), False),# bvsge
Rule(r"(s<)", InfixOpAdapter(self.mgr.BVSLT, 60), False),# bvslt
Rule(r"(s>)", InfixOpAdapter(self.mgr.BVSGT, 60), False),# bvsgt
Rule(r"(>=)", InfixOpAdapter(self.mgr.GE, 60), False),# ge
Rule(r"(<=)", InfixOpAdapter(self.mgr.LE, 60), False),# le
Rule(r"(>)", InfixOpAdapter(self.mgr.GT, 60), False),# gt
Rule(r"(<)", InfixOpAdapter(self.mgr.LT, 60), False),# lt
Rule(r"(=)", InfixOpAdapter(self.mgr.Equals, 60), False),# eq
Rule(r"(\+)", InfixOpAdapter(self.PlusOrBVAdd, 70), False),# plus
Rule(r"(-)", InfixOrUnaryOpAdapter(self.MinusOrBVSub, self.UMinusOrBvNeg, 70, 100), False),# minus
Rule(r"(\*)", InfixOpAdapter(self.TimesOrBVMul, 80), False),# times
Rule(r"(\^)", InfixOpAdapter(self.mgr.Pow, 80), False),# pow
Rule(r"(u/)", InfixOpAdapter(self.mgr.BVUDiv, 80), False),# udiv
Rule(r"(s/)", InfixOpAdapter(self.mgr.BVSDiv, 80), False),# sdiv
Rule(r"(/)", InfixOpAdapter(self.mgr.Div, 80), False),# div
Rule(r"(s%)", InfixOpAdapter(self.mgr.BVSRem, 80), False),# srem
Rule(r"(u%)", InfixOpAdapter(self.mgr.BVURem, 80), False),# urem
Rule(r"(\?)", ExprIf(), False), # question
Rule(r"(:=)", ArrStore(), False),# ArrStore
Rule(r"(::)", InfixOpAdapter(self.mgr.BVConcat, 90), False),# BVXor
Rule(r"(:)", ExprElse(), False),# colon
Rule(r"(False)", Constant(self.mgr.FALSE()), False), # False
Rule(r"(True)", Constant(self.mgr.TRUE()), False),# True
Rule(r"(,)", ExprComma(), False),# comma
Rule(r"(\.)", ExprDot(), False),# dot
Rule(r"(xor)", InfixOpAdapter(self.mgr.BVXor, 10), False),# BVXor
Rule(r"(ROR)", InfixOpAdapter(self.BVHack(self.mgr.BVRor), 90), False),# BVRor
Rule(r"(ROL)", InfixOpAdapter(self.BVHack(self.mgr.BVRol), 90), False),# BVRol
Rule(r"(ZEXT)", InfixOpAdapter(self.BVHack(self.mgr.BVZExt), 90), False),# BVZext
Rule(r"(SEXT)", InfixOpAdapter(self.BVHack(self.mgr.BVSExt), 90), False),# BVSext
Rule(r"(bvcomp)", InfixOpAdapter(self.mgr.BVComp, 90), False),#
Rule(r"(forall)", Quantifier(self.mgr.ForAll, 20), False),#
Rule(r"(exists)", Quantifier(self.mgr.Exists, 20), False),#
Rule(r"(ToReal)", UnaryOpAdapter(self.mgr.ToReal, 100), False),#
Rule(r"(str\.len)", FunctionCallAdapter(self.mgr.StrLength, 100), False), # str_length
Rule(r"(str\.\+\+)", FunctionCallAdapter(self.mgr.StrConcat, 100), False), # str_concat
Rule(r"(str\.at)", FunctionCallAdapter(self.mgr.StrCharAt, 100), False), # str_charat
Rule(r"(str\.contains)", FunctionCallAdapter(self.mgr.StrContains, 100), False), # str_contains
Rule(r"(str\.indexof)", FunctionCallAdapter(self.mgr.StrIndexOf, 100), False), # str_indexof
Rule(r"(str\.replace)", FunctionCallAdapter(self.mgr.StrReplace, 100), False), # str_replace
Rule(r"(str\.substr)", FunctionCallAdapter(self.mgr.StrSubstr, 100), False), # str_substr
Rule(r"(str\.prefixof)", FunctionCallAdapter(self.mgr.StrPrefixOf, 100), False), # str_prefixof
Rule(r"(str\.suffixof)", FunctionCallAdapter(self.mgr.StrSuffixOf, 100), False), # str_suffixof
Rule(r"(str\.to\.int)", FunctionCallAdapter(self.mgr.StrToInt, 100), False), # str_to_int
Rule(r"(int\.to\.str)", FunctionCallAdapter(self.mgr.IntToStr, 100), False), # int_to_str
Rule(r"(bv2nat)", UnaryOpAdapter(self.mgr.BVToNatural, 100), False),#
Rule(r"'(.*?)'", self.identifier, True), # quoted identifiers
Rule(r"([A-Za-z_][A-Za-z0-9_]*)", self.identifier, True),# identifiers
Rule(r"(.)", self.lexing_error, True), # input error
]
self.rules += hr_rules
self.compile()
def bv_type(self, read):
return BVTypeTok(int(read))
def real_constant(self, read):
return Constant(self.mgr.Real(Fraction(read)))
def bv_constant(self, read):
v, w = read.split("_")
return Constant(self.mgr.BV(int(v), width=int(w)))
def int_constant(self, read):
return Constant(self.mgr.Int(int(read)))
def string_constant(self, read):
return Constant(self.mgr.String(read))
def identifier(self, read):
return Identifier(read, env=self.env)
def UMinusOrBvNeg(self, x):
ty = self.get_type(x)
if ty.is_int_type():
return self.mgr.Times(self.mgr.Int(-1), x)
elif ty.is_real_type():
return self.mgr.Times(self.mgr.Real(-1), x)
else:
return self.mgr.BVNeg(x)
def AndOrBVAnd(self, l, r):
if self.get_type(l).is_bool_type():
return self.mgr.And(l, r)
else:
return self.mgr.BVAnd(l, r)
def OrOrBVOr(self, l, r):
if self.get_type(l).is_bool_type():
return self.mgr.Or(l, r)
else:
return self.mgr.BVOr(l, r)
def NotOrBVNot(self, x):
if self.get_type(x).is_bool_type():
return self.mgr.Not(x)
else:
return self.mgr.BVNot(x)
def PlusOrBVAdd(self, l, r):
if self.get_type(l).is_bv_type():
return self.mgr.BVAdd(l, r)
else:
return self.mgr.Plus(l, r)
def MinusOrBVSub(self, l, r):
if self.get_type(l).is_bv_type():
return self.mgr.BVSub(l, r)
else:
return self.mgr.Minus(l, r)
def TimesOrBVMul(self, l, r):
if self.get_type(l).is_bv_type():
return self.mgr.BVMul(l, r)
else:
return self.mgr.Times(l, r)
def BVHack(self, op):
def _res(a, b):
if b.is_constant():
return op(a, b.constant_value())
else:
raise PysmtSyntaxError("Constant expected, got '%s'" % b)
return _res
# EOC HRLexer
#
# Grammar tokens
#
# Placeholder tokens
class CloseBrace(GrammarSymbol):
pass
class ArrStore(GrammarSymbol):
pass
class BVTypeTok(GrammarSymbol):
def __init__(self, width):
GrammarSymbol.__init__(self)
self.width = width
def nud(self, parser):
return types.BVType(self.width)
class IntTypeTok(GrammarSymbol):
def nud(self, parser):
return types.INT
class RealTypeTok(GrammarSymbol):
def nud(self, parser):
return types.REAL
class BoolTypeTok(GrammarSymbol):
def nud(self, parser):
return types.BOOL
class Constant(GrammarSymbol):
def __init__(self, value):
GrammarSymbol.__init__(self)
self.value = value
def nud(self, parser):
return self.value
class Identifier(GrammarSymbol):
def __init__(self, name, env):
GrammarSymbol.__init__(self)
self.value = env.formula_manager.get_symbol(name)
if self.value is None:
raise UndefinedSymbolError(name)
def nud(self, parser):
return self.value
class ExprIf(GrammarSymbol):
def __init__(self):
GrammarSymbol.__init__(self)
self.lbp = 5
def led(self, parser, left):
cond_ = left
then_ = parser.expression(self.lbp)
parser.expect(ExprElse, ':')
else_ = parser.expression(self.lbp)
return parser.mgr.Ite(cond_, then_, else_)
class OpenArrayTypeTok(GrammarSymbol):
def __init__(self):
GrammarSymbol.__init__(self)
self.lbp = 5
def nud(self, parser):
idx_type = parser.expression()
parser.expect(ExprComma, ",")
el_type = parser.expression()
parser.expect(CloseBrace, "}")
if type(parser.token) == OpenPar:
parser.advance()
default = parser.expression()
parser.expect(ClosePar, ")")
return parser.mgr.Array(idx_type, default)
else:
return types.ArrayType(idx_type, el_type)
class OpenPar(GrammarSymbol):
def __init__(self):
GrammarSymbol.__init__(self)
self.lbp = 200
def nud(self, parser):
r = parser.expression()
if type(parser.token) != ClosePar:
raise SyntaxError("Expected ')', got '%s'" % parser.token)
parser.advance()
return r
def led(self, parser, left):
# a function call
fun = left
params = []
if type(parser.token) != ClosePar:
while True:
r = parser.expression()
params.append(r)
if type(parser.token) != ExprComma:
break
parser.advance()
parser.expect(ClosePar, ")")
return parser.mgr.Function(fun, params)
class OpenBrak(GrammarSymbol):
def __init__(self):
GrammarSymbol.__init__(self)
self.lbp = 300
def led(self, parser, left):
# BVExtract, Select or Store
op = left
e1 = parser.expression()
if type(parser.token) == ExprElse:
#BVExtract
parser.advance()
end = parser.expression()
parser.expect(CloseBrak, "]")
return parser.mgr.BVExtract(op,
e1.constant_value(),
end.constant_value())
elif type(parser.token) == CloseBrak:
# Select
parser.advance()
return parser.mgr.Select(op, e1)
elif type(parser.token) == ArrStore:
#Store
parser.advance()
e2 = parser.expression()
parser.expect(CloseBrak, "]")
return parser.mgr.Store(op, e1, e2)
else:
raise PysmtSyntaxError("Unexpected token:" + str(parser.token))
class Quantifier(GrammarSymbol):
def __init__(self, operator, lbp):
GrammarSymbol.__init__(self)
self.operator = operator
self.lbp = lbp
def nud(self, parser):
qvars = []
if type(parser.token) != ExprDot:
while True:
r = parser.expression()
qvars.append(r)
if type(parser.token) != ExprComma:
break
parser.advance()
parser.expect(ExprDot, ".")
matrix = parser.expression(self.lbp)
return self.operator(qvars, matrix)
class PrattParser(object):
"""The Pratt-Parser
This parser is explained in:
http://effbot.org/zone/simple-top-down-parsing.htm
The LexerClass is required, and is the one doing the heavy lifting.
"""
def __init__(self, LexerClass, env=None):
if env is None:
env = get_env()
self.env = env
self.mgr = env.formula_manager
self.get_type = env.stc.get_type
self.lexer = LexerClass(env)
self.token = None
self.tokenizer = None
def expression(self, rbp=0):
"""Parses an expression"""
t = self.token
self.token = next(self.tokenizer)
left = t.nud(self)
while rbp < self.token.lbp:
t = self.token
self.token = next(self.tokenizer)
left = t.led(self, left)
return left
def parse_fname(self, fname):
"""Parses the content of the given file name"""
with open(fname, "r") as f:
return self.parse(f.read())
def parse(self, string):
"""Parses the content of the given string"""
self.token = None
self.tokenizer = self.lexer.tokenize(string)
self.token = next(self.tokenizer)
result = self.expression()
try:
bd = next(self.tokenizer)
raise PysmtSyntaxError("Bogus data after expression: '%s' "
"(Partial: %s)" % (bd, result))
except StopIteration:
return result
def advance(self):
"""Advance reading of one token"""
self.token = next(self.tokenizer)
def expect(self, token_class, token_repr):
"""
Check that the next token is the specified one or fail with a
ParserError
"""
if type(self.token) != token_class:
raise PysmtSyntaxError("Expected '%s'" % token_repr)
self.advance()
# EOC PrattParser
#
# Adapters
#
# These are adapters used to create tokens for various types of symbols
#
class EndOfInput(GrammarSymbol):
pass
class UnaryOpAdapter(GrammarSymbol):
"""Adapter for unary operator."""
def __init__(self, operator, lbp):
GrammarSymbol.__init__(self)
self.operator = operator
self.lbp = lbp
def nud(self, parser):
right = parser.expression(self.lbp)
return self.operator(right)
class InfixOpAdapter(GrammarSymbol):
"""Adapter for infix operator."""
def __init__(self, operator, lbp):
GrammarSymbol.__init__(self)
self.operator = operator
self.lbp = lbp
def led(self, parser, left):
right = parser.expression(self.lbp)
return self.operator(left, right)
def __repr__(self):
return repr(self.operator)
class InfixOrUnaryOpAdapter(GrammarSymbol):
"""Adapter for operators that can be both binary infix or unary.
b_operator and b_lbp: define the behavior when considering the symbol
as a binary operator.
u_operator and u_lbp: define the behavior when considering the symbol
as a unary operator.
"""
def __init__(self, b_operator, u_operator, b_lbp, u_lbp):
GrammarSymbol.__init__(self)
self.b_operator = b_operator
self.u_operator = u_operator
self.lbp = b_lbp
self.u_lbp = u_lbp
def nud(self, parser):
right = parser.expression(self.u_lbp)
return self.u_operator(right)
def led(self, parser, left):
right = parser.expression(self.lbp)
return self.b_operator(left, right)
class FunctionCallAdapter(GrammarSymbol):
"""Adapter for function calls."""
def __init__(self, operator, lbp):
GrammarSymbol.__init__(self)
self.operator = operator
self.lbp = lbp
def nud(self, parser):
parser.advance() # OpenPar
params = []
if type(parser.token) != ClosePar:
while True:
r = parser.expression()
params.append(r)
if type(parser.token) != ExprComma:
break
parser.advance()
if type(parser.token) != ClosePar:
raise SyntaxError("Expected ')'")
parser.advance()
return self.operator(*params)
def __repr__(self):
return repr(self.operator)
class ClosePar(GrammarSymbol):
pass
class CloseBrak(GrammarSymbol):
pass
class ExprElse(GrammarSymbol):
pass
class ExprDot(GrammarSymbol):
pass
class ExprComma(GrammarSymbol):
pass