diff --git a/tagstudio/src/core/query_lang/ast.py b/tagstudio/src/core/query_lang/ast.py index a68feb91..d402b65f 100644 --- a/tagstudio/src/core/query_lang/ast.py +++ b/tagstudio/src/core/query_lang/ast.py @@ -8,10 +8,10 @@ class ConstraintType(Enum): @staticmethod def from_string(text: str) -> "ConstraintType": - return { - "tag": ConstraintType.Tag, - "mediatype": ConstraintType.MediaType - }.get(text.lower(), None) + return {"tag": ConstraintType.Tag, "mediatype": ConstraintType.MediaType}.get( + text.lower(), None + ) + class AST: def __str__(self): @@ -19,10 +19,11 @@ class AST: fields = vars(self) # Get all instance variables as a dictionary field_str = ", ".join(f"{key}={value}" for key, value in fields.items()) return f"{class_name}({field_str})" - + def __repr__(self) -> str: return self.__str__() + class ANDList(AST): elements: list[Union["ORList", "Constraint"]] @@ -30,6 +31,7 @@ class ANDList(AST): super().__init__() self.elements = elements + class ORList(AST): terms: list[ANDList] @@ -37,6 +39,7 @@ class ORList(AST): super().__init__() self.terms = terms + class Constraint(AST): type: ConstraintType value: str @@ -48,6 +51,7 @@ class Constraint(AST): self.value = value self.properties = properties + class Property(AST): key: str value: str @@ -55,4 +59,4 @@ class Property(AST): def __init__(self, key: str, value: str) -> None: super().__init__() self.key = key - self.value = value \ No newline at end of file + self.value = value diff --git a/tagstudio/src/core/query_lang/parser.py b/tagstudio/src/core/query_lang/parser.py index c02f6b5d..08533f47 100644 --- a/tagstudio/src/core/query_lang/parser.py +++ b/tagstudio/src/core/query_lang/parser.py @@ -16,7 +16,7 @@ class Parser: self.text = text self.tokenizer = Tokenizer(self.text) self.next_token = self.tokenizer.get_next_token() - + def parse(self) -> AST: out = self.__or_list() if self.next_token.type != TokenType.EOF: @@ -24,24 +24,24 @@ class Parser: return out def __and_list(self) -> ANDList: - elements = [ self.__term() ] + elements = [self.__term()] while self.next_token.type != TokenType.EOF and not self.__is_next_or(): self.__skip_and() elements.append(self.__term()) return ANDList(elements) - + def __skip_and(self) -> None: if self.__is_next_and(): self.__eat(TokenType.ULITERAL) if self.__is_next_and(): raise self.__syntax_error("Unexpected AND") - + def __is_next_and(self) -> bool: return self.next_token.type == TokenType.ULITERAL and self.next_token.value.upper() == "AND" - + def __or_list(self) -> ORList: - terms = [ self.__and_list() ] + terms = [self.__and_list()] while self.__is_next_or(): self.__eat(TokenType.ULITERAL) @@ -51,7 +51,7 @@ class Parser: def __is_next_or(self) -> bool: return self.next_token.type == TokenType.ULITERAL and self.next_token.value.upper() == "OR" - + def __term(self) -> Union["ORList", "Constraint"]: if self.next_token.type == TokenType.RBRACKETO: self.__eat(TokenType.RBRACKETO) @@ -60,18 +60,18 @@ class Parser: return out else: return self.__constraint() - + def __constraint(self) -> Constraint: if self.next_token.type == TokenType.CONSTRAINTTYPE: self.last_constraint_type = self.__eat(TokenType.CONSTRAINTTYPE).value - + value = self.__literal() properties = [] if self.next_token.type == TokenType.SBRACKETO: self.__eat(TokenType.SBRACKETO) properties.append(self.__property()) - + while self.next_token.type == TokenType.COMMA: self.__eat(TokenType.COMMA) properties.append(self.__property()) @@ -79,13 +79,13 @@ class Parser: self.__eat(TokenType.SBRACKETC) return Constraint(self.last_constraint_type, value, properties) - + def __property(self) -> Property: key = self.__eat(TokenType.ULITERAL).value self.__eat(TokenType.EQUALS) value = self.__literal() return Property(key, value) - + def __literal(self) -> str: if self.next_token.type in [TokenType.QLITERAL, TokenType.ULITERAL]: return self.__eat(self.next_token.type).value @@ -101,7 +101,8 @@ class Parser: def __syntax_error(self, msg: str = "Syntax Error") -> ParsingError: return ParsingError(self.next_token.start, self.next_token.end, msg) -if __name__ == "__main__": #TODO remove + +if __name__ == "__main__": # TODO remove print("") # noqa: T201 p = Parser("Mario AND Luigi tag:test[parent=Color,color=red] OR mediatype:test") print(p.parse()) # noqa: T201 diff --git a/tagstudio/src/core/query_lang/tokenizer.py b/tagstudio/src/core/query_lang/tokenizer.py index c632d87c..16523d7b 100644 --- a/tagstudio/src/core/query_lang/tokenizer.py +++ b/tagstudio/src/core/query_lang/tokenizer.py @@ -6,18 +6,19 @@ from src.core.query_lang.util import ParsingError class TokenType(Enum): - EOF = -1 - QLITERAL = 0 # Quoted Literal - ULITERAL = 1 # Unquoted Literal (does not contain ":", " ", "[", "]", "(", ")", "=", ",") - RBRACKETO = 2 # Round Bracket Open - RBRACKETC = 3 # Round Bracket Close - SBRACKETO = 4 # Square Bracket Open - SBRACKETC = 5 # Square Bracket Close + EOF = -1 + QLITERAL = 0 # Quoted Literal + ULITERAL = 1 # Unquoted Literal (does not contain ":", " ", "[", "]", "(", ")", "=", ",") + RBRACKETO = 2 # Round Bracket Open + RBRACKETC = 3 # Round Bracket Close + SBRACKETO = 4 # Square Bracket Open + SBRACKETC = 5 # Square Bracket Close CONSTRAINTTYPE = 6 - COLON = 10 - COMMA = 11 + COLON = 10 + COMMA = 11 EQUALS = 12 + class Token: type: TokenType value: Any @@ -30,7 +31,7 @@ class Token: self.value = value self.start = start self.end = end - + @staticmethod def from_type(type: TokenType, pos: int = None) -> "Token": return Token(type, None, pos, pos) @@ -38,13 +39,14 @@ class Token: @staticmethod def EOF() -> "Token": # noqa: N802 return Token.from_type(TokenType.EOF) - + def __str__(self) -> str: return f"Token({self.type}, {self.value}, {self.start}, {self.end})" - + def __repr__(self) -> str: return self.__str__() + class Tokenizer: text: str pos: int @@ -85,7 +87,7 @@ class Tokenizer: return Token.from_type(TokenType.EQUALS, self.pos - 1) else: return self.__unquoted_string_or_constraint_type() - + def __unquoted_string_or_constraint_type(self) -> Token: out = "" @@ -96,14 +98,14 @@ class Tokenizer: self.__advance() end = self.pos - 1 - + if self.current_char == ":": if len(out) == 0: raise ParsingError(self.pos, self.pos) self.__advance() constraint_type = ConstraintType.from_string(out) if constraint_type is None: - raise ParsingError(start, end, f"Invalid ContraintType \"{out}\"") + raise ParsingError(start, end, f'Invalid ContraintType "{out}"') return Token(TokenType.CONSTRAINTTYPE, constraint_type, start, end) else: return Token(TokenType.ULITERAL, out, start, end) @@ -139,14 +141,15 @@ class Tokenizer: self.current_char = self.text[self.pos] else: self.current_char = None - + def __skip_whitespace(self) -> None: if self.current_char is None: return while self.current_char.isspace(): self.__advance() -if __name__ == "__main__": #TODO remove + +if __name__ == "__main__": # TODO remove t = Tokenizer("Mario AND Luigi tag:test[parent=Color,color=red]") last = Token(None, None) while last.type != TokenType.EOF: diff --git a/tagstudio/src/core/query_lang/util.py b/tagstudio/src/core/query_lang/util.py index 93885e89..80aed712 100644 --- a/tagstudio/src/core/query_lang/util.py +++ b/tagstudio/src/core/query_lang/util.py @@ -7,9 +7,9 @@ class ParsingError(BaseException): self.start = start self.end = end self.msg = msg - + def __str__(self) -> str: return f"Syntax Error {self.start}->{self.end}: {self.msg}" def __repr__(self) -> str: - return self.__str__() \ No newline at end of file + return self.__str__()