0%

LL(1)分析法中求first集合的Python实现

源程序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def First(string, G):
string_firstset = set()
i = 0
while True:
if i >= len(string):
string_firstset.add('~')
return string_firstset
elif (string[i] >= 'a' and string[i] <= 'z') or string[i] == '+' or string[i] == '*' or string[i] == '~' or string[i] == '(' or string[i] == ')': # 如果是终结符, 直接返回集合
string_firstset.add(string[i])
return string_firstset
else: # 如果是非终结符
# 表示E
if i+1 < len(string) and string[i+1] == "'":
E = string[i] + string[i+1]
else:
E = string[i]

# 找到E所在的那一行
line = 0
while G[line][0] != E:
line += 1

# 求E的first集合
E_firstset = set()

column = 1
while G[line][column] != '':
E_firstset = E_firstset.union(First(G[line][column], G))
column += 1

# ~是否在E的first集合中
if '~' in E_firstset:
if len(E) == 1:
i += 1
else:
i += 2
# string_firtsset集合增加
E_firstset.remove('~')
string_firstset = string_firstset.union(E_firstset)
else:
string_firstset = string_firstset.union(E_firstset)
return string_firstset

# 文法的数据化结构
G = [['']*10 for i in range(10)]
line = 0

# 打开放有文法的文件
f = open('G.txt', 'r')
lines = f.readlines()
f.close()

for g in lines:
print(g, end='')
i = 0
column = 0
while True:
while g[i].isspace() or g[i] == '-' or g[i] == '>' or g[i] == '|': # 去掉空白和无关紧要的符号
i += 1
if i >= len(g):
break
if i>= len(g):
break
while g[i].isalpha() or g[i] == "'" or g[i] == '+' or g[i] == '*' or g[i] =='(' or g[i] == ')' or g[i] == '~': # 获取各字符串
G[line][column] += g[i]
i += 1
if i >= len(g):
break
column += 1
if i>= len(g):
break

line += 1

# for i in range(10): # 打印效果
# print(G[i])

# 打印各非终结符的first集合
print('First(C) = ', end = '')
print(First('C', G))
print("First(B') = ", end = '')
print(First("B'", G))
print('First(B) = ', end = '')
print(First('B', G))
print('First(A) = ', end = '')
print(First('A', G))
print('First(S) = ', end = '')
print(First('S', G))

G.txt
放文法的文件
其中~代表\(\varepsilon\)

1
2
3
4
5
S  -> AB
A -> Ca | ~
B -> cB'
B' -> aACB' | ~
C -> b | ~
Thank you for your reward !