Source code for pysmt.parsing

 #
# This file is part of pySMT.
#
#   Copyright 2014 Andrea Micheli and Marco Gario
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#
import re
from collections import namedtuple
from typing import List, Optional

import pysmt
import pysmt.typing as types
from pysmt.environment import get_env
from pysmt.exceptions import PysmtSyntaxError, UndefinedSymbolError
from pysmt.constants import Fraction



[docs]
def HRParser(env: Optional["pysmt.environment.Environment"]=None):
    """Parser for HR format of pySMT."""
    return PrattParser(HRLexer, env=env)




[docs]
def parse(string):
    """Parse an hr-string."""
    return HRParser().parse(string)



# Rules for the Lexer
Rule = namedtuple('Rule', ['regex', 'symbol', 'is_functional'])


class Lexer(object):
    """This class produces a stream of token objects for the Pratt-parser.

    The rules for the token are store in self.rules. Call self.compile() after
    modifying rules.
    """

    def __init__(self, env: Optional["pysmt.environment.Environment"]=None):
        if env is None:
            env = get_env()
        self.env = env
        self.mgr = env.formula_manager
        self.get_type = env.stc.get_type

        self.rules: List[Rule] = []
        self.scanner = None
        self.eoi = EndOfInput()

    def compile(self):
        self.scanner = re.compile("|".join(rule.regex for rule in self.rules),
                                  re.DOTALL | re.VERBOSE)
    def lexing_error(self, read):
        raise PysmtSyntaxError("Unexpected input: %s" % read)

    def tokenize(self, data):
        """The token generator for the given string"""
        for match in re.finditer(self.scanner, data):
            for idx, capture in enumerate(match.groups()):
                if capture is not None:
                    rule = self.rules[idx]
                    symbol = rule.symbol
                    if rule.symbol is not None:
                        if rule.is_functional:
                            yield symbol(capture)
                        else:
                            yield symbol
                    break
        yield self.eoi

# EOC Lexer


class GrammarSymbol(object):
    """Base class for all the parsing tokens"""

    def __init__(self, lbp=0, value=None, payload=None):
        self.lbp = lbp
        self.value = value
        self.payload = payload

    def nud(self, parser):
        raise PysmtSyntaxError("Syntax error at token '%s'." % parser.token)

    def led(self, parser, left):
        raise PysmtSyntaxError("Syntax error at token '%s' (Read: '%s')." % \
                          (parser.token, left))

#
# Precedence table (from low priority to high priority):
#
# 10 : Xor, Implies, Iff
# 20 : Forall, Exists
# 30 : Or
# 40 : And
# 50 : Not
# 60 : LE, LT, Equals, GE, GT, BVULE, BVUGE, BVULT, BVUGT, BVSLE, BVSGE, BVSGT
# 70 : Plus, Minus, BVAdd, BVSub
# 80 : Times, Div, BVMul, BVUDiv, BVSDiv, BVSRem, BVURem
# 90 : BVLShl, BVLShr, BVAShr, BVConcat, BVXor, BVRor, BVRol, BVZExt, BVSExt, BVComp
# 100 : ToReal Uminus BVNeg BVToNat
# 200 : ()
# 300 : []

class HRLexer(Lexer):
    """Produces a stream of token objects for the Human-Readable format."""

    def __init__(self, env: Optional["pysmt.environment.Environment"]=None):
        Lexer.__init__(self, env=env)

        hr_rules = [
            Rule(r"(\s+)", None, False),                    # whitespace
            Rule(r"(-?\d+/\d+)", self.real_constant, True), # fractions
            Rule(r"(-?\d+\.\d+)", self.real_constant, True),# decimals
            Rule(r"(-?\d+_\d+)", self.bv_constant, True),# bv
            Rule(r"(-?\d+)", self.int_constant, True),# integer literals
            Rule(r"\"(.*?)\"", self.string_constant, True), # String Constant
            Rule(r"BV\{(\d+)\}", self.bv_type, True),# BV Type
            Rule(r"(Array\{)", OpenArrayTypeTok(), False),# Array Type
            Rule(r"(&)", InfixOpAdapter(self.AndOrBVAnd, 40), False),# conjunction
            Rule(r"(\|)", InfixOpAdapter(self.OrOrBVOr, 30), False),# disjunction
            Rule(r"(!)", UnaryOpAdapter(self.NotOrBVNot, 50), False),# negation
            Rule(r"(\()", OpenPar(), False),# open parenthesis
            Rule(r"(\))", ClosePar(), False),# closed parenthesis
            Rule(r"(\[)", OpenBrak(), False),# open parenthesis
            Rule(r"(\])", CloseBrak(), False),# closed parenthesis
            Rule(r"(\})", CloseBrace(), False),# closed parenthesis
            Rule(r"(<<)", InfixOpAdapter(self.mgr.BVLShl, 90), False),# Shl
            Rule(r"(>>)", InfixOpAdapter(self.mgr.BVLShr, 90), False),# Shr
            Rule(r"(a>>)", InfixOpAdapter(self.mgr.BVAShr, 90), False),# AShr
            Rule(r"(<->)", InfixOpAdapter(self.mgr.Iff, 10), False),# iff
            Rule(r"(->)", InfixOpAdapter(self.mgr.Implies, 10), False),# implies
            Rule(r"(u<=)", InfixOpAdapter(self.mgr.BVULE, 60), False),# bvule
            Rule(r"(u>=)", InfixOpAdapter(self.mgr.BVUGE, 60), False),# bvuge
            Rule(r"(u<)", InfixOpAdapter(self.mgr.BVULT, 60), False),# bvult
            Rule(r"(u>)", InfixOpAdapter(self.mgr.BVUGT, 60), False),# bvugt
            Rule(r"(s<=)", InfixOpAdapter(self.mgr.BVSLE, 60), False),# bvsle
            Rule(r"(s>=)", InfixOpAdapter(self.mgr.BVSGE, 60), False),# bvsge
            Rule(r"(s<)", InfixOpAdapter(self.mgr.BVSLT, 60), False),# bvslt
            Rule(r"(s>)", InfixOpAdapter(self.mgr.BVSGT, 60), False),# bvsgt
            Rule(r"(>=)", InfixOpAdapter(self.mgr.GE, 60), False),# ge
            Rule(r"(<=)", InfixOpAdapter(self.mgr.LE, 60), False),# le
            Rule(r"(>)", InfixOpAdapter(self.mgr.GT, 60), False),# gt
            Rule(r"(<)", InfixOpAdapter(self.mgr.LT, 60), False),# lt
            Rule(r"(=)", InfixOpAdapter(self.mgr.Equals, 60), False),# eq
            Rule(r"(\+)", InfixOpAdapter(self.PlusOrBVAdd, 70), False),# plus
            Rule(r"(-)", InfixOrUnaryOpAdapter(self.MinusOrBVSub, self.UMinusOrBvNeg, 70, 100), False),# minus
            Rule(r"(\*)", InfixOpAdapter(self.TimesOrBVMul, 80), False),# times
            Rule(r"(\^)", InfixOpAdapter(self.mgr.Pow, 80), False),# pow
            Rule(r"(u/)", InfixOpAdapter(self.mgr.BVUDiv, 80), False),# udiv
            Rule(r"(s/)", InfixOpAdapter(self.mgr.BVSDiv, 80), False),# sdiv
            Rule(r"(/)", InfixOpAdapter(self.mgr.Div, 80), False),# div
            Rule(r"(s%)", InfixOpAdapter(self.mgr.BVSRem, 80), False),# srem
            Rule(r"(u%)", InfixOpAdapter(self.mgr.BVURem, 80), False),# urem
            Rule(r"(\?)", ExprIf(), False), # question
            Rule(r"(:=)", ArrStore(), False),# ArrStore
            Rule(r"(::)", InfixOpAdapter(self.mgr.BVConcat, 90), False),# BVXor
            Rule(r"(:)", ExprElse(), False),# colon
            Rule(r"(,)", ExprComma(), False),# comma
            Rule(r"(\.)", ExprDot(), False),# dot
            Rule(r"(forall\s+)", Quantifier(self.mgr.ForAll, 20), False),#
            Rule(r"(exists\s+)", Quantifier(self.mgr.Exists, 20), False),#
            Rule(r"(str\.len)", FunctionCallAdapter(self.mgr.StrLength, 100), False), # str_length
            Rule(r"(str\.\+\+)", FunctionCallAdapter(self.mgr.StrConcat, 100), False), # str_concat
            Rule(r"(str\.at)", FunctionCallAdapter(self.mgr.StrCharAt, 100), False), # str_charat
            Rule(r"(str\.contains)", FunctionCallAdapter(self.mgr.StrContains, 100), False), # str_contains
            Rule(r"(str\.indexof)", FunctionCallAdapter(self.mgr.StrIndexOf, 100), False), # str_indexof
            Rule(r"(str\.replace)", FunctionCallAdapter(self.mgr.StrReplace, 100), False), # str_replace
            Rule(r"(str\.substr)", FunctionCallAdapter(self.mgr.StrSubstr, 100), False), # str_substr
            Rule(r"(str\.prefixof)", FunctionCallAdapter(self.mgr.StrPrefixOf, 100), False), # str_prefixof
            Rule(r"(str\.suffixof)", FunctionCallAdapter(self.mgr.StrSuffixOf, 100), False), # str_suffixof
            Rule(r"(str\.to\.int)", FunctionCallAdapter(self.mgr.StrToInt, 100), False), # str_to_int
            Rule(r"(int\.to\.str)", FunctionCallAdapter(self.mgr.IntToStr, 100), False), # int_to_str
            Rule(r"'(.*?)'", self.identifier, True), # quoted identifiers
            Rule(r"([A-Za-z_][A-Za-z0-9_]*)", self.identifier, True),# identifiers
            Rule(r"(.)", self.lexing_error, True), # input error
        ]
        self.rules += hr_rules

        self._identifier_map = {
            "False": Constant(self.mgr.FALSE()), # False
            "True": Constant(self.mgr.TRUE()),# True
            "xor": InfixOpAdapter(self.mgr.BVXor, 10),# BVXor
            "bv2nat": UnaryOpAdapter(self.mgr.BVToNatural, 100),#
            "bvcomp": InfixOpAdapter(self.mgr.BVComp, 90),#
            "ROR": InfixOpAdapter(self.BVHack(self.mgr.BVRor), 90),# BVRor
            "ROL": InfixOpAdapter(self.BVHack(self.mgr.BVRol), 90),# BVRol
            "ZEXT": InfixOpAdapter(self.BVHack(self.mgr.BVZExt), 90),# BVZext
            "SEXT": InfixOpAdapter(self.BVHack(self.mgr.BVSExt), 90),# BVSext
            "ToReal": UnaryOpAdapter(self.mgr.ToReal, 100),#
            "Int": IntTypeTok(),# Int Type
            "Real": RealTypeTok(),# Real Type
            "Bool": BoolTypeTok(),# Bool Type
        }

        self.compile()

    def bv_type(self, read):
        return BVTypeTok(int(read))

    def real_constant(self, read):
        return Constant(self.mgr.Real(Fraction(read)))

    def bv_constant(self, read):
        v, w = read.split("_")
        return Constant(self.mgr.BV(int(v), width=int(w)))

    def int_constant(self, read):
        return Constant(self.mgr.Int(int(read)))

    def string_constant(self, read):
        return Constant(self.mgr.String(read))

    def identifier(self, read):
        res = self._identifier_map.get(read, None)
        if res is not None:
            return res
        return Identifier(read, env=self.env)

    def UMinusOrBvNeg(self, x):
        ty = self.get_type(x)
        if ty.is_int_type():
            return self.mgr.Times(self.mgr.Int(-1), x)
        elif ty.is_real_type():
            return self.mgr.Times(self.mgr.Real(-1), x)
        else:
            return self.mgr.BVNeg(x)

    def AndOrBVAnd(self, l, r):
        if self.get_type(l).is_bool_type():
            return self.mgr.And(l, r)
        else:
            return self.mgr.BVAnd(l, r)

    def OrOrBVOr(self, l, r):
        if self.get_type(l).is_bool_type():
            return self.mgr.Or(l, r)
        else:
            return self.mgr.BVOr(l, r)

    def NotOrBVNot(self, x):
        if self.get_type(x).is_bool_type():
            return self.mgr.Not(x)
        else:
            return self.mgr.BVNot(x)

    def PlusOrBVAdd(self, l, r):
        if self.get_type(l).is_bv_type():
            return self.mgr.BVAdd(l, r)
        else:
            return self.mgr.Plus(l, r)

    def MinusOrBVSub(self, l, r):
        if self.get_type(l).is_bv_type():
            return self.mgr.BVSub(l, r)
        else:
            return self.mgr.Minus(l, r)

    def TimesOrBVMul(self, l, r):
        if self.get_type(l).is_bv_type():
            return self.mgr.BVMul(l, r)
        else:
            return self.mgr.Times(l, r)

    def BVHack(self, op):
        def _res(a, b):
            if b.is_constant():
                return op(a, b.constant_value())
            else:
                raise PysmtSyntaxError("Constant expected, got '%s'" % b)
        return _res


# EOC HRLexer

#
# Grammar tokens
#

# Placeholder tokens


class CloseBrace(GrammarSymbol):
    pass

class ArrStore(GrammarSymbol):
    pass


class BVTypeTok(GrammarSymbol):
    def __init__(self, width):
        GrammarSymbol.__init__(self)
        self.width = width

    def nud(self, parser):
        return types.BVType(self.width)

class IntTypeTok(GrammarSymbol):
    def nud(self, parser):
        return types.INT

class RealTypeTok(GrammarSymbol):
    def nud(self, parser):
        return types.REAL

class BoolTypeTok(GrammarSymbol):
    def nud(self, parser):
        return types.BOOL

class Constant(GrammarSymbol):
    def __init__(self, value):
        GrammarSymbol.__init__(self)
        self.value = value

    def nud(self, parser):
        return self.value


class Identifier(GrammarSymbol):
    def __init__(self, name, env):
        GrammarSymbol.__init__(self)
        self.value = env.formula_manager.get_symbol(name)
        if self.value is None:
            raise UndefinedSymbolError(name)

    def nud(self, parser):
        return self.value


class ExprIf(GrammarSymbol):
    def __init__(self):
        GrammarSymbol.__init__(self)
        self.lbp = 5

    def led(self, parser, left):
        cond_ = left
        then_ = parser.expression(self.lbp)
        parser.expect(ExprElse, ':')
        else_ = parser.expression(self.lbp)
        return parser.mgr.Ite(cond_, then_, else_)


class OpenArrayTypeTok(GrammarSymbol):
    def __init__(self):
        GrammarSymbol.__init__(self)
        self.lbp = 5

    def nud(self, parser):
        idx_type = parser.expression()
        parser.expect(ExprComma, ",")
        el_type = parser.expression()
        parser.expect(CloseBrace, "}")
        if type(parser.token) == OpenPar:
            parser.advance()
            default = parser.expression()
            parser.expect(ClosePar, ")")
            return parser.mgr.Array(idx_type, default)
        else:
            return types.ArrayType(idx_type, el_type)


class OpenPar(GrammarSymbol):
    def __init__(self):
        GrammarSymbol.__init__(self)
        self.lbp = 200

    def nud(self, parser):
        r = parser.expression()
        if type(parser.token) != ClosePar:
            raise SyntaxError("Expected ')', got '%s'" % parser.token)
        parser.advance()
        return r

    def led(self, parser, left):
        # a function call
        fun = left
        params = []
        if type(parser.token) != ClosePar:
            while True:
                r = parser.expression()
                params.append(r)
                if type(parser.token) != ExprComma:
                    break
                parser.advance()
        parser.expect(ClosePar, ")")
        return parser.mgr.Function(fun, params)


class OpenBrak(GrammarSymbol):
    def __init__(self):
        GrammarSymbol.__init__(self)
        self.lbp = 300

    def led(self, parser, left):
        # BVExtract, Select or Store
        op = left
        e1 = parser.expression()
        if type(parser.token) == ExprElse:
            #BVExtract
            parser.advance()
            end = parser.expression()
            parser.expect(CloseBrak, "]")
            return parser.mgr.BVExtract(op,
                                        e1.constant_value(),
                                        end.constant_value())
        elif type(parser.token) == CloseBrak:
            # Select
            parser.advance()
            return parser.mgr.Select(op, e1)
        elif type(parser.token) == ArrStore:
            #Store
            parser.advance()
            e2 = parser.expression()
            parser.expect(CloseBrak, "]")
            return parser.mgr.Store(op, e1, e2)
        else:
            raise PysmtSyntaxError("Unexpected token:" + str(parser.token))


class Quantifier(GrammarSymbol):
    def __init__(self, operator, lbp):
        GrammarSymbol.__init__(self)
        self.operator = operator
        self.lbp = lbp

    def nud(self, parser):
        qvars = []
        if type(parser.token) != ExprDot:
            while True:
                r = parser.expression()
                qvars.append(r)
                if type(parser.token) != ExprComma:
                    break
                parser.advance()
        parser.expect(ExprDot, ".")
        matrix = parser.expression(self.lbp)
        return self.operator(qvars, matrix)


class PrattParser(object):
    """The Pratt-Parser

    This parser is explained in:
       http://effbot.org/zone/simple-top-down-parsing.htm

    The LexerClass is required, and is the one doing the heavy lifting.
    """

    def __init__(self, LexerClass, env: Optional["pysmt.environment.Environment"]=None):
        if env is None:
            env = get_env()

        self.env = env
        self.mgr = env.formula_manager
        self.get_type = env.stc.get_type
        self.lexer = LexerClass(env)

        self.token = None
        self.tokenizer = None

    def expression(self, rbp=0):
        """Parses an expression"""
        t = self.token
        self.token = next(self.tokenizer)
        left = t.nud(self)
        while rbp < self.token.lbp:
            t = self.token
            self.token = next(self.tokenizer)
            left = t.led(self, left)
        return left

    def parse_fname(self, fname):
        """Parses the content of the given file name"""
        with open(fname, "r") as f:
            return self.parse(f.read())

    def parse(self, string):
        """Parses the content of the given string"""
        self.token = None
        self.tokenizer = self.lexer.tokenize(string)
        self.token = next(self.tokenizer)
        result = self.expression()
        try:
            bd = next(self.tokenizer)
            raise PysmtSyntaxError("Bogus data after expression: '%s' "
                                   "(Partial: %s)" % (bd, result))
        except StopIteration:
            return result

    def advance(self):
        """Advance reading of one token"""
        self.token = next(self.tokenizer)


    def expect(self, token_class, token_repr):
        """
        Check that the next token is the specified one or fail with a
        ParserError
        """
        if type(self.token) != token_class:
            raise PysmtSyntaxError("Expected '%s'" % token_repr)
        self.advance()

# EOC PrattParser

#
# Adapters
#
# These are adapters used to create tokens for various types of symbols
#

class EndOfInput(GrammarSymbol):
    pass


class UnaryOpAdapter(GrammarSymbol):
    """Adapter for unary operator."""

    def __init__(self, operator, lbp):
        GrammarSymbol.__init__(self)
        self.operator = operator
        self.lbp = lbp

    def nud(self, parser):
        right = parser.expression(self.lbp)
        return self.operator(right)


class InfixOpAdapter(GrammarSymbol):
    """Adapter for infix operator."""

    def __init__(self, operator, lbp):
        GrammarSymbol.__init__(self)
        self.operator = operator
        self.lbp = lbp

    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return self.operator(left, right)

    def __repr__(self):
        return repr(self.operator)


class InfixOrUnaryOpAdapter(GrammarSymbol):
    """Adapter for operators that can be both binary infix or unary.

    b_operator and b_lbp: define the behavior when considering the symbol
                          as a binary operator.
    u_operator and u_lbp: define the behavior when considering the symbol
                          as a unary operator.
    """

    def __init__(self, b_operator, u_operator, b_lbp, u_lbp):
        GrammarSymbol.__init__(self)
        self.b_operator = b_operator
        self.u_operator = u_operator
        self.lbp = b_lbp
        self.u_lbp = u_lbp

    def nud(self, parser):
        right = parser.expression(self.u_lbp)
        return self.u_operator(right)

    def led(self, parser, left):
        right = parser.expression(self.lbp)
        return self.b_operator(left, right)


class FunctionCallAdapter(GrammarSymbol):
    """Adapter for function calls."""

    def __init__(self, operator, lbp):
        GrammarSymbol.__init__(self)
        self.operator = operator
        self.lbp = lbp

    def nud(self, parser):
        parser.advance() # OpenPar
        params = []
        if type(parser.token) != ClosePar:
            while True:
                r = parser.expression()
                params.append(r)
                if type(parser.token) != ExprComma:
                    break
                parser.advance()
        if type(parser.token) != ClosePar:
            raise SyntaxError("Expected ')'")
        parser.advance()
        return self.operator(*params)

    def __repr__(self):
        return repr(self.operator)


class ClosePar(GrammarSymbol):
    pass


class CloseBrak(GrammarSymbol):
    pass


class ExprElse(GrammarSymbol):
    pass


class ExprDot(GrammarSymbol):
    pass


class ExprComma(GrammarSymbol):
    pass