add files

This commit is contained in:
Jann Stute
2024-11-27 20:42:57 +01:00
parent 20f93719d7
commit 7981d13274
4 changed files with 330 additions and 0 deletions

View File

@@ -0,0 +1,58 @@
from enum import Enum
from typing import Union
class ConstraintType(Enum):
Tag = 0
MediaType = 1
@staticmethod
def from_string(text: str) -> "ConstraintType":
return {
"tag": ConstraintType.Tag,
"mediatype": ConstraintType.MediaType
}.get(text.lower(), None)
class AST:
def __str__(self):
class_name = self.__class__.__name__
fields = vars(self) # Get all instance variables as a dictionary
field_str = ", ".join(f"{key}={value}" for key, value in fields.items())
return f"{class_name}({field_str})"
def __repr__(self) -> str:
return self.__str__()
class ANDList(AST):
elements: list["ORList"]
def __init__(self, elements: list["ORList"]) -> None:
super().__init__()
self.elements = elements
class ORList(AST):
terms: list[Union[ANDList, "Constraint"]]
def __init__(self, terms: list[Union[ANDList, "Constraint"]]) -> None:
super().__init__()
self.terms = terms
class Constraint(AST):
type: ConstraintType
value: str
properties: list["Property"]
def __init__(self, type: ConstraintType, value: str, properties: list["Property"]) -> None:
super().__init__()
self.type = type
self.value = value
self.properties = properties
class Property(AST):
key: str
value: str
def __init__(self, key: str, value: str) -> None:
super().__init__()
self.key = key
self.value = value

View File

@@ -0,0 +1,104 @@
from src.core.query_lang.ast import AST, ANDList, Constraint, ORList, Property
from src.core.query_lang.tokenizer import ConstraintType, Token, Tokenizer, TokenType
from src.core.query_lang.util import ParsingError
class Parser:
text: str
tokenizer: Tokenizer
next_token: Token
last_constraint_type: ConstraintType = ConstraintType.Tag
def __init__(self, text: str) -> None:
self.text = text
self.tokenizer = Tokenizer(self.text)
self.next_token = self.tokenizer.get_next_token()
def parse(self) -> AST:
out = self.__or_list()
if self.next_token.type != TokenType.EOF:
raise ParsingError(self.next_token.start, self.next_token.end, "Syntax Error")
return out
def __and_list(self) -> ANDList:
elements = [ self.__term() ]
while self.next_token.type != TokenType.EOF and not self.__is_next_or():
self.__skip_and()
elements.append(self.__term())
return ANDList(elements)
def __skip_and(self) -> None:
if self.__is_next_and():
self.__eat(TokenType.ULITERAL)
if self.__is_next_and():
raise self.__syntax_error("Unexpected AND")
def __is_next_and(self) -> bool:
return self.next_token.type == TokenType.ULITERAL and self.next_token.value.upper() == "AND"
def __or_list(self) -> ORList:
terms = [ self.__and_list() ]
while self.__is_next_or():
self.__eat(TokenType.ULITERAL)
terms.append(self.__and_list())
return ORList(terms)
def __is_next_or(self) -> bool:
return self.next_token.type == TokenType.ULITERAL and self.next_token.value.upper() == "OR"
def __term(self) -> AST:
if self.next_token.type == TokenType.RBRACKETO:
self.__eat(TokenType.RBRACKETO)
out = self.__and_list()
self.__eat(TokenType.RBRACKETC)
return out
else:
return self.__constraint()
def __constraint(self) -> Constraint:
if self.next_token.type == TokenType.CONSTRAINTTYPE:
self.last_constraint_type = self.__eat(TokenType.CONSTRAINTTYPE).value
value = self.__literal()
properties = []
if self.next_token.type == TokenType.SBRACKETO:
self.__eat(TokenType.SBRACKETO)
properties.append(self.__property())
while self.next_token.type == TokenType.COMMA:
self.__eat(TokenType.COMMA)
properties.append(self.__property())
self.__eat(TokenType.SBRACKETC)
return Constraint(self.last_constraint_type, value, properties)
def __property(self) -> Property:
key = self.__eat(TokenType.ULITERAL).value
self.__eat(TokenType.EQUALS)
value = self.__literal()
return Property(key, value)
def __literal(self) -> str:
if self.next_token.type in [TokenType.QLITERAL, TokenType.ULITERAL]:
return self.__eat(self.next_token.type).value
def __eat(self, type: TokenType) -> Token:
if self.next_token.type != type:
self.__syntax_error(f"expected {type} found {self.next_token.type}")
out = self.next_token
self.next_token = self.tokenizer.get_next_token()
return out
def __syntax_error(self, msg: str = "Syntax Error") -> None:
raise ParsingError(self.next_token.start, self.next_token.end, msg)
if __name__ == "__main__": #TODO remove
print("") # noqa: T201
p = Parser("Mario AND Luigi tag:test[parent=Color,color=red] OR mediatype:test")
print(p.parse()) # noqa: T201

View File

@@ -0,0 +1,153 @@
from enum import Enum
from src.core.query_lang.ast import ConstraintType
from src.core.query_lang.util import ParsingError
class TokenType(Enum):
EOF = -1
QLITERAL = 0 # Quoted Literal
ULITERAL = 1 # Unquoted Literal (does not contain ":", " ", "[", "]", "(", ")", "=", ",")
RBRACKETO = 2 # Round Bracket Open
RBRACKETC = 3 # Round Bracket Close
SBRACKETO = 4 # Square Bracket Open
SBRACKETC = 5 # Square Bracket Close
CONSTRAINTTYPE = 6
COLON = 10
COMMA = 11
EQUALS = 12
class Token:
type: TokenType
value: any
start: int
end: int
def __init__(self, type: TokenType, value: any, start: int = None, end: int = None) -> None:
self.type = type
self.value = value
self.start = start
self.end = end
@staticmethod
def from_type(type: TokenType, pos: int = None) -> TokenType:
return Token(type, None, pos, pos)
@staticmethod
def EOF() -> "Token": # noqa: N802
return Token.from_type(TokenType.EOF)
def __str__(self) -> str:
return f"Token({self.type}, {self.value}, {self.start}, {self.end})"
def __repr__(self) -> str:
return self.__str__()
class Tokenizer:
text: str
pos: int
current_char: str
ESCAPABLE_CHARS = ["\\", '"', '"']
NOT_IN_ULITERAL = [":", " ", "[", "]", "(", ")", "=", ","]
def __init__(self, text: str) -> None:
self.text = text
self.pos = 0
self.current_char = self.text[self.pos]
def get_next_token(self) -> Token:
self.__skip_whitespace()
if self.current_char is None:
return Token.EOF()
if self.current_char in ("'", '"'):
return self.__quoted_string()
elif self.current_char == "(":
self.__advance()
return Token.from_type(TokenType.RBRACKETO, self.pos - 1)
elif self.current_char == ")":
self.__advance()
return Token.from_type(TokenType.RBRACKETC, self.pos - 1)
elif self.current_char == "[":
self.__advance()
return Token.from_type(TokenType.SBRACKETO, self.pos - 1)
elif self.current_char == "]":
self.__advance()
return Token.from_type(TokenType.SBRACKETC, self.pos - 1)
elif self.current_char == ",":
self.__advance()
return Token.from_type(TokenType.COMMA, self.pos - 1)
elif self.current_char == "=":
self.__advance()
return Token.from_type(TokenType.EQUALS, self.pos - 1)
else:
return self.__unquoted_string_or_constraint_type()
def __unquoted_string_or_constraint_type(self) -> Token:
out = ""
start = self.pos
while self.current_char not in self.NOT_IN_ULITERAL and self.current_char is not None:
out += self.current_char
self.__advance()
end = self.pos - 1
if self.current_char == ":":
if len(out) == 0:
raise ParsingError(self.pos, self.pos)
self.__advance()
constraint_type = ConstraintType.from_string(out)
if constraint_type is None:
raise ParsingError(start, end, f"Invalid ContraintType \"{out}\"")
return Token(TokenType.CONSTRAINTTYPE, constraint_type, start, end)
else:
return Token(TokenType.ULITERAL, out, start, end)
def __quoted_string(self) -> Token:
start = self.pos
quote = self.current_char
self.__advance()
escape = False
out = ""
while escape or self.current_char != quote:
if escape:
escape = False
if self.current_char not in Tokenizer.ESCAPABLE_CHARS:
out += "\\"
else:
out += self.current_char
self.__advance()
continue
if self.current_char == "\\":
escape = True
else:
out += self.current_char
self.__advance()
end = self.pos
self.__advance()
return Token(TokenType.QLITERAL, out, start, end)
def __advance(self) -> None:
if self.pos < len(self.text) - 1:
self.pos += 1
self.current_char = self.text[self.pos]
else:
self.current_char = None
def __skip_whitespace(self) -> None:
if self.current_char is None:
return
while self.current_char.isspace():
self.__advance()
if __name__ == "__main__": #TODO remove
t = Tokenizer("Mario AND Luigi tag:test[parent=Color,color=red]")
last = Token(None, None)
while last.type != TokenType.EOF:
last = t.get_next_token()
print(last) # noqa: T201

View File

@@ -0,0 +1,15 @@
class ParsingError(BaseException):
start: int
end: int
msg: str
def __init__(self, start: int, end: int, msg: str = "Syntax Error") -> None:
self.start = start
self.end = end
self.msg = msg
def __str__(self) -> str:
return f"Syntax Error {self.start}->{self.end}: {self.msg}"
def __repr__(self) -> str:
return self.__str__()