mirror of
https://github.com/TagStudioDev/TagStudio.git
synced 2026-02-02 08:09:13 +00:00
add files
This commit is contained in:
58
tagstudio/src/core/query_lang/ast.py
Normal file
58
tagstudio/src/core/query_lang/ast.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from enum import Enum
|
||||
from typing import Union
|
||||
|
||||
|
||||
class ConstraintType(Enum):
|
||||
Tag = 0
|
||||
MediaType = 1
|
||||
|
||||
@staticmethod
|
||||
def from_string(text: str) -> "ConstraintType":
|
||||
return {
|
||||
"tag": ConstraintType.Tag,
|
||||
"mediatype": ConstraintType.MediaType
|
||||
}.get(text.lower(), None)
|
||||
|
||||
class AST:
|
||||
def __str__(self):
|
||||
class_name = self.__class__.__name__
|
||||
fields = vars(self) # Get all instance variables as a dictionary
|
||||
field_str = ", ".join(f"{key}={value}" for key, value in fields.items())
|
||||
return f"{class_name}({field_str})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
|
||||
class ANDList(AST):
|
||||
elements: list["ORList"]
|
||||
|
||||
def __init__(self, elements: list["ORList"]) -> None:
|
||||
super().__init__()
|
||||
self.elements = elements
|
||||
|
||||
class ORList(AST):
|
||||
terms: list[Union[ANDList, "Constraint"]]
|
||||
|
||||
def __init__(self, terms: list[Union[ANDList, "Constraint"]]) -> None:
|
||||
super().__init__()
|
||||
self.terms = terms
|
||||
|
||||
class Constraint(AST):
|
||||
type: ConstraintType
|
||||
value: str
|
||||
properties: list["Property"]
|
||||
|
||||
def __init__(self, type: ConstraintType, value: str, properties: list["Property"]) -> None:
|
||||
super().__init__()
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.properties = properties
|
||||
|
||||
class Property(AST):
|
||||
key: str
|
||||
value: str
|
||||
|
||||
def __init__(self, key: str, value: str) -> None:
|
||||
super().__init__()
|
||||
self.key = key
|
||||
self.value = value
|
||||
104
tagstudio/src/core/query_lang/parser.py
Normal file
104
tagstudio/src/core/query_lang/parser.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from src.core.query_lang.ast import AST, ANDList, Constraint, ORList, Property
|
||||
from src.core.query_lang.tokenizer import ConstraintType, Token, Tokenizer, TokenType
|
||||
from src.core.query_lang.util import ParsingError
|
||||
|
||||
|
||||
class Parser:
|
||||
text: str
|
||||
tokenizer: Tokenizer
|
||||
next_token: Token
|
||||
|
||||
last_constraint_type: ConstraintType = ConstraintType.Tag
|
||||
|
||||
def __init__(self, text: str) -> None:
|
||||
self.text = text
|
||||
self.tokenizer = Tokenizer(self.text)
|
||||
self.next_token = self.tokenizer.get_next_token()
|
||||
|
||||
def parse(self) -> AST:
|
||||
out = self.__or_list()
|
||||
if self.next_token.type != TokenType.EOF:
|
||||
raise ParsingError(self.next_token.start, self.next_token.end, "Syntax Error")
|
||||
return out
|
||||
|
||||
def __and_list(self) -> ANDList:
|
||||
elements = [ self.__term() ]
|
||||
while self.next_token.type != TokenType.EOF and not self.__is_next_or():
|
||||
self.__skip_and()
|
||||
elements.append(self.__term())
|
||||
return ANDList(elements)
|
||||
|
||||
def __skip_and(self) -> None:
|
||||
if self.__is_next_and():
|
||||
self.__eat(TokenType.ULITERAL)
|
||||
|
||||
if self.__is_next_and():
|
||||
raise self.__syntax_error("Unexpected AND")
|
||||
|
||||
def __is_next_and(self) -> bool:
|
||||
return self.next_token.type == TokenType.ULITERAL and self.next_token.value.upper() == "AND"
|
||||
|
||||
def __or_list(self) -> ORList:
|
||||
terms = [ self.__and_list() ]
|
||||
|
||||
while self.__is_next_or():
|
||||
self.__eat(TokenType.ULITERAL)
|
||||
terms.append(self.__and_list())
|
||||
|
||||
return ORList(terms)
|
||||
|
||||
def __is_next_or(self) -> bool:
|
||||
return self.next_token.type == TokenType.ULITERAL and self.next_token.value.upper() == "OR"
|
||||
|
||||
def __term(self) -> AST:
|
||||
if self.next_token.type == TokenType.RBRACKETO:
|
||||
self.__eat(TokenType.RBRACKETO)
|
||||
out = self.__and_list()
|
||||
self.__eat(TokenType.RBRACKETC)
|
||||
return out
|
||||
else:
|
||||
return self.__constraint()
|
||||
|
||||
def __constraint(self) -> Constraint:
|
||||
if self.next_token.type == TokenType.CONSTRAINTTYPE:
|
||||
self.last_constraint_type = self.__eat(TokenType.CONSTRAINTTYPE).value
|
||||
|
||||
value = self.__literal()
|
||||
|
||||
properties = []
|
||||
if self.next_token.type == TokenType.SBRACKETO:
|
||||
self.__eat(TokenType.SBRACKETO)
|
||||
properties.append(self.__property())
|
||||
|
||||
while self.next_token.type == TokenType.COMMA:
|
||||
self.__eat(TokenType.COMMA)
|
||||
properties.append(self.__property())
|
||||
|
||||
self.__eat(TokenType.SBRACKETC)
|
||||
|
||||
return Constraint(self.last_constraint_type, value, properties)
|
||||
|
||||
def __property(self) -> Property:
|
||||
key = self.__eat(TokenType.ULITERAL).value
|
||||
self.__eat(TokenType.EQUALS)
|
||||
value = self.__literal()
|
||||
return Property(key, value)
|
||||
|
||||
def __literal(self) -> str:
|
||||
if self.next_token.type in [TokenType.QLITERAL, TokenType.ULITERAL]:
|
||||
return self.__eat(self.next_token.type).value
|
||||
|
||||
def __eat(self, type: TokenType) -> Token:
|
||||
if self.next_token.type != type:
|
||||
self.__syntax_error(f"expected {type} found {self.next_token.type}")
|
||||
out = self.next_token
|
||||
self.next_token = self.tokenizer.get_next_token()
|
||||
return out
|
||||
|
||||
def __syntax_error(self, msg: str = "Syntax Error") -> None:
|
||||
raise ParsingError(self.next_token.start, self.next_token.end, msg)
|
||||
|
||||
if __name__ == "__main__": #TODO remove
|
||||
print("") # noqa: T201
|
||||
p = Parser("Mario AND Luigi tag:test[parent=Color,color=red] OR mediatype:test")
|
||||
print(p.parse()) # noqa: T201
|
||||
153
tagstudio/src/core/query_lang/tokenizer.py
Normal file
153
tagstudio/src/core/query_lang/tokenizer.py
Normal file
@@ -0,0 +1,153 @@
|
||||
from enum import Enum
|
||||
|
||||
from src.core.query_lang.ast import ConstraintType
|
||||
from src.core.query_lang.util import ParsingError
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
EOF = -1
|
||||
QLITERAL = 0 # Quoted Literal
|
||||
ULITERAL = 1 # Unquoted Literal (does not contain ":", " ", "[", "]", "(", ")", "=", ",")
|
||||
RBRACKETO = 2 # Round Bracket Open
|
||||
RBRACKETC = 3 # Round Bracket Close
|
||||
SBRACKETO = 4 # Square Bracket Open
|
||||
SBRACKETC = 5 # Square Bracket Close
|
||||
CONSTRAINTTYPE = 6
|
||||
COLON = 10
|
||||
COMMA = 11
|
||||
EQUALS = 12
|
||||
|
||||
class Token:
|
||||
type: TokenType
|
||||
value: any
|
||||
|
||||
start: int
|
||||
end: int
|
||||
|
||||
def __init__(self, type: TokenType, value: any, start: int = None, end: int = None) -> None:
|
||||
self.type = type
|
||||
self.value = value
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
@staticmethod
|
||||
def from_type(type: TokenType, pos: int = None) -> TokenType:
|
||||
return Token(type, None, pos, pos)
|
||||
|
||||
@staticmethod
|
||||
def EOF() -> "Token": # noqa: N802
|
||||
return Token.from_type(TokenType.EOF)
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Token({self.type}, {self.value}, {self.start}, {self.end})"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
|
||||
class Tokenizer:
|
||||
text: str
|
||||
pos: int
|
||||
current_char: str
|
||||
|
||||
ESCAPABLE_CHARS = ["\\", '"', '"']
|
||||
NOT_IN_ULITERAL = [":", " ", "[", "]", "(", ")", "=", ","]
|
||||
|
||||
def __init__(self, text: str) -> None:
|
||||
self.text = text
|
||||
self.pos = 0
|
||||
self.current_char = self.text[self.pos]
|
||||
|
||||
def get_next_token(self) -> Token:
|
||||
self.__skip_whitespace()
|
||||
if self.current_char is None:
|
||||
return Token.EOF()
|
||||
|
||||
if self.current_char in ("'", '"'):
|
||||
return self.__quoted_string()
|
||||
elif self.current_char == "(":
|
||||
self.__advance()
|
||||
return Token.from_type(TokenType.RBRACKETO, self.pos - 1)
|
||||
elif self.current_char == ")":
|
||||
self.__advance()
|
||||
return Token.from_type(TokenType.RBRACKETC, self.pos - 1)
|
||||
elif self.current_char == "[":
|
||||
self.__advance()
|
||||
return Token.from_type(TokenType.SBRACKETO, self.pos - 1)
|
||||
elif self.current_char == "]":
|
||||
self.__advance()
|
||||
return Token.from_type(TokenType.SBRACKETC, self.pos - 1)
|
||||
elif self.current_char == ",":
|
||||
self.__advance()
|
||||
return Token.from_type(TokenType.COMMA, self.pos - 1)
|
||||
elif self.current_char == "=":
|
||||
self.__advance()
|
||||
return Token.from_type(TokenType.EQUALS, self.pos - 1)
|
||||
else:
|
||||
return self.__unquoted_string_or_constraint_type()
|
||||
|
||||
def __unquoted_string_or_constraint_type(self) -> Token:
|
||||
out = ""
|
||||
|
||||
start = self.pos
|
||||
|
||||
while self.current_char not in self.NOT_IN_ULITERAL and self.current_char is not None:
|
||||
out += self.current_char
|
||||
self.__advance()
|
||||
|
||||
end = self.pos - 1
|
||||
|
||||
if self.current_char == ":":
|
||||
if len(out) == 0:
|
||||
raise ParsingError(self.pos, self.pos)
|
||||
self.__advance()
|
||||
constraint_type = ConstraintType.from_string(out)
|
||||
if constraint_type is None:
|
||||
raise ParsingError(start, end, f"Invalid ContraintType \"{out}\"")
|
||||
return Token(TokenType.CONSTRAINTTYPE, constraint_type, start, end)
|
||||
else:
|
||||
return Token(TokenType.ULITERAL, out, start, end)
|
||||
|
||||
def __quoted_string(self) -> Token:
|
||||
start = self.pos
|
||||
quote = self.current_char
|
||||
self.__advance()
|
||||
escape = False
|
||||
out = ""
|
||||
|
||||
while escape or self.current_char != quote:
|
||||
if escape:
|
||||
escape = False
|
||||
if self.current_char not in Tokenizer.ESCAPABLE_CHARS:
|
||||
out += "\\"
|
||||
else:
|
||||
out += self.current_char
|
||||
self.__advance()
|
||||
continue
|
||||
if self.current_char == "\\":
|
||||
escape = True
|
||||
else:
|
||||
out += self.current_char
|
||||
self.__advance()
|
||||
end = self.pos
|
||||
self.__advance()
|
||||
return Token(TokenType.QLITERAL, out, start, end)
|
||||
|
||||
def __advance(self) -> None:
|
||||
if self.pos < len(self.text) - 1:
|
||||
self.pos += 1
|
||||
self.current_char = self.text[self.pos]
|
||||
else:
|
||||
self.current_char = None
|
||||
|
||||
def __skip_whitespace(self) -> None:
|
||||
if self.current_char is None:
|
||||
return
|
||||
while self.current_char.isspace():
|
||||
self.__advance()
|
||||
|
||||
if __name__ == "__main__": #TODO remove
|
||||
t = Tokenizer("Mario AND Luigi tag:test[parent=Color,color=red]")
|
||||
last = Token(None, None)
|
||||
while last.type != TokenType.EOF:
|
||||
last = t.get_next_token()
|
||||
print(last) # noqa: T201
|
||||
15
tagstudio/src/core/query_lang/util.py
Normal file
15
tagstudio/src/core/query_lang/util.py
Normal file
@@ -0,0 +1,15 @@
|
||||
class ParsingError(BaseException):
|
||||
start: int
|
||||
end: int
|
||||
msg: str
|
||||
|
||||
def __init__(self, start: int, end: int, msg: str = "Syntax Error") -> None:
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.msg = msg
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"Syntax Error {self.start}->{self.end}: {self.msg}"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return self.__str__()
|
||||
Reference in New Issue
Block a user