-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser_util.py
More file actions
84 lines (78 loc) · 1.93 KB
/
parser_util.py
File metadata and controls
84 lines (78 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
import re
from lexer import Lexer, LexerError
RULES = [
(
r'\bAND\b',
'AND'
),
(
r'\bOR\b',
'OR'
),
(
r'\bNOT\b',
'NOT'
),
(
r'\(',
'LP'
),
(
r'\)',
'RP'
),
(
r'\[[^ \)\(\[\]]+\]',
'SEARCH_IN'
),
(
r'[^ \)\(\[\]]+',
'TERM'
),
]
def normalize_quotes(query):
normalized = re.sub(r'[""“”‟〝〞]+', '"', query)
normalized = re.sub(r'[‘’]', "'", normalized)
if normalized.count('"') == 1:
normalized = normalized.replace('"', '')
return normalized
def clear_newlines(query):
return query.replace("\n", " ")
def merge_terms(parse_query):
parse_query_joined = []
new_terms = []
for term, type_ in parse_query:
if type_ == "TERM":
new_terms.append(term)
else:
if len(new_terms) > 0:
parse_query_joined.append(
(
" ".join(new_terms),
"TERM"
)
)
new_terms = []
parse_query_joined.append((term, type_))
if len(new_terms) > 0:
parse_query_joined.append(
(
" ".join(new_terms),
"TERM"
)
)
return parse_query_joined
def parse_strategy(search_strategy):
lexer_instance = Lexer(RULES, skip_whitespace=True)
normalized_strategy = clear_newlines(normalize_quotes(search_strategy))
lexer_instance.input(normalized_strategy)
stack = []
try:
for tok in lexer_instance.tokens():
stack.append((tok.val, tok.type))
except LexerError as err:
print('LexerError at position %s' % err.pos)
return merge_terms(stack)
def get_terms(search_strategy):
return [part[0] for part in parse_strategy(search_strategy) if part[1] == "TERM"]