LR(1) 语法分析 (Python)

python

内容

  • 生成 LR(1) 规范项集族
  • 构造 LR(1) 分析表
  • 自动机和分析表可视化
  • 基于栈的自顶向上分析器
  • 二义性文法处理(本程序中遇冲突直接覆写,可自由调整)

python"># 项:(prod_id, dot_pos, ahead)

import copy

def closure(prods, ps, first):

ps = ps[:]

for (prod_id, dot_pos, ahead) in ps:

prod = prods[prod_id]

if dot_pos + 1 < len(prod):

cur = prod[dot_pos+1]

# next_symbol = prod[dot_pos+2] if dot_pos+2 < len(prod) else ahead

nss = []

tmpseq = prod[dot_pos+2:] + [ahead]

i = 0

first['$'] = ['$']

while i < len(tmpseq):

for x in first[tmpseq[i]]:

if (x != '$' or tmpseq[i] == '$') and x not in nss:

nss.append(x)

if '$' not in first[tmpseq[i]]:

break

i += 1

for next_symbol in nss:

for ex_prod_id, ex_prod in enumerate(prods):

if ex_prod[0] == cur:

new_item = (ex_prod_id, 0, next_symbol)

if new_item not in ps:

ps.append(new_item)

return ps

def goto(prods, ps, a, first):

ans = []

for (i, j, ah) in ps:

prod = prods[i]

if j + 1 < len(prod):

cur = prod[j + 1]

if cur == a:

new_item = (i, j+1, ah)

if new_item not in ans:

ans.append(new_item)

return closure(prods, ans, first)

def prod2str(prod, dotpos):

prod = prod[:]

prod.append("")

prod[dotpos + 1] = '·' + prod[dotpos + 1]

return prod[0] + '→' + ''.join(prod[1:])

def make_lr1(prods):

table = [{}]

words = list(set(j for i in prods for j in i))

nonterminals = list(set(i[0] for i in prods))

terminals = [i for i in words if i not in nonterminals]

terminals .append('$')

nonterminals.sort()

terminals.sort()

# 预处理 first 集合

first = {}

for i in terminals:

first[i] = [i]

for i in nonterminals:

first[i] = []

while True:

old_first = copy.deepcopy(first)

for prod in prods:

left = prod[0]

right = prod[1:]

i = 0

while i < len(right):

for x in first[right[i]]:

if x != "$" and x not in first[left]:

first[left].append(x)

if "$" not in first[right[i]]:

break

i += 1

if i == len(right):

if "$" not in first[left]:

first[left].append("$")

if old_first == first:

break

I0 = closure(prods, [(0, 0, '$')], first)

cc = [I0]

i = 0

while i < len(cc):

ccc = {}

for j in cc[i]:

if (j[0], j[1]) not in ccc.keys():

ccc[(j[0], j[1])] = j[2]

else:

ccc[(j[0], j[1])] = j[2]

ccc = [[i[0], i[1], j] for i, j in ccc.items()]

print("\033[35m[I%d]:\n \033[31m" % i, '\n \033[31m'.join(prod2str(prods[j[0]], j[1])+' \t\033[33m'+j[2]+'\033[30m'

for j in ccc), end="\n \033[32m")

candiwords = []

for j in cc[i]:

prod = prods[j[0]]

dot_pos = j[1]+1

if dot_pos < len(prod) and prod[dot_pos] not in candiwords:

candiwords.append(prod[dot_pos])

if dot_pos == len(prod):

if prod[0] != "S'":

if j[2] not in table[i].keys():

table[i][j[2]] = 'r%d' % j[0]

else:

table[i][j[2]] = 'r%d' % j[0]

else:

if '$' not in table[i].keys():

table[i]['$'] = 'acc'

else:

table[i]['$'] += 'acc'

for j in candiwords:

tmp = goto(prods, cc[i], j, first)

if len(tmp) > 0 and tmp not in cc:

cc.append(tmp)

table.append({})

if len(tmp) > 0:

print(j, cc.index(tmp), sep=",", end=" ")

if j in terminals:

if j not in table[i].keys():

table[i][j] = "s%d" % cc.index(tmp)

else:

table[i][j] = "s%d" % cc.index(tmp)

else:

table[i][j] = "%d" % cc.index(tmp)

i += 1

print("\033[0m")

head = ['I']

for j in terminals:

head.append(j)

for j in nonterminals:

if j == "S'":

continue

head.append(j)

print('---'.join(''+'-'*(5) for i in head))

print(' | '.join(i+' '*(5-len(i)) for i in head))

print('-+-'.join(''+'-'*(5) for i in head))

for i, line in enumerate(table):

lo = [str(i)]

for j in terminals:

if j in line.keys():

lo.append(table[i][j])

else:

lo.append("")

for j in nonterminals:

if j == "S'":

continue

if j in line.keys():

lo.append(table[i][j])

else:

lo.append("")

print(' | '.join(i+' '*(5-len(i)) for i in lo))

return table

prod_file = open("prod.txt", "r")

prod_lines = prod_file.readlines()

prod_lines = [i[:-1] for i in prod_lines]

prod_lines = [i[:-1] if i[-1] == ' ' else i for i in prod_lines]

prods = [i.split(" ")[:1] + i.split(" ")[2:] for i in prod_lines]

print(prods)

table = make_lr1(prods)

tokens = []

token_file = open("tokens.txt", "r")

token_lines = token_file.readlines()

token_lines = [i[:-1] for i in token_lines]

token_lines = [i[:-1] if i[-1] == ' ' else i for i in token_lines]

token_lines = [i.split(' ') for i in token_lines]

tokens = [i[1] for i in token_lines]

tokens_str = [i[2] for i in token_lines]

tokens.append("$")

tokens_str.append("")

stack_token = ['#']

stack_state = [0]

input_ptr = 0

while len(stack_token) > 0:

cur_token = tokens[input_ptr]

cur_state = stack_state[-1]

if cur_token not in table[cur_state].keys():

print("errorA")

break

action = table[cur_state][cur_token]

if action == "acc":

print("succeed")

break

elif action[0] == 's':

next_state = int(action[1:])

stack_token.append(cur_token)

stack_state.append(next_state)

input_ptr += 1

print("shift", cur_token)

else:

prod_id = int(action[1:])

prod_len = len(prods[prod_id]) - 1

print("reduce", prods[prod_id])

stack_token = stack_token[:-prod_len]

stack_state = stack_state[:-prod_len]

cur_state = stack_state[-1]

goto_nt = prods[prod_id][0]

if goto_nt not in table[cur_state].keys():

print("errorB")

break

next_state = int(table[cur_state][goto_nt])

stack_token.append(goto_nt)

stack_state.append(next_state)

文法示例

S' -> L 

L -> L S

L -> S

S -> S0 ;

S -> { L }

S0 -> Sa

Sa -> Id = E

S -> IfC S

S -> IfCEl S

IfCEl -> IfC S else

IfC -> if ( C )

C -> C1

C -> Ca C1

Ca -> C &&

C1 -> C2

C1 -> Co C2

Co -> C1 ||

C2 -> C3

C2 -> ! C3

C3 -> ( C )

C -> E Rop E

Rop -> ==

Rop -> !=

Rop -> <

Rop -> <=

Rop -> >

Rop -> >=

E -> E + Et

E -> E - Et

E -> Et

Et -> Et * Ef

Et -> Et / Ef

Et -> Et % Ef

Et -> Ef

Ef -> ( E )

Ef -> Id

E -> Const

项集族输出示例

分析表输出示例

分析输入示例

if (1 == 2) if (3 == 4) a = 1; else a = 2; else a = 3;

分析输出示例

以上是 LR(1) 语法分析 (Python) 的全部内容, 来源链接: utcz.com/z/386872.html

回到顶部