最近在尝试实现CommonLisp,想起CL的常用操作符car, cdr, defparameter之类,如果把它们改短些,输入时不就省事么?不过动手前先找准目标比较妥当,于是写了个Python脚本,统计ClozureCL、CLISP中出现的所有标识符,挑出排名靠前的看看。
# 统计CommonLisp实现中,最常用的标识符及其出现的频率
import string, os, sys, re;
def stat_file(file_path, dic):
max_id_len = 0;
fp = open(file_path);
try:
for line in fp.readlines():
ids = re.findall("\w+[a-zA-Z0-9-_]*", line);
for id in ids:
l = len(id)
if l > max_id_len:
max_id_len = l
dic[id] = dic.get(id,0) + 1;
except:
None
return max_id_len
base_dirs = ["C:\wz\greenware\ccl-1.7" , "C:\wz\greenware\clisp-2.49"]
dic_id_cnt = {}
max_id_len = 0
for base_dir in base_dirs:
for root, dirs, files in os.walk(base_dir):
for file_name in files:
if re.match(".*\.lisp$", file_name) != None and re.search("\.svn", file_name) == None:
file_path = os.path.join(root, file_name)
print(file_path);
tm = stat_file(file_path, dic_id_cnt);
if tm > max_id_len:
max_id_len = tm
ml = max_id_len + 1
tuples = sorted(dic_id_cnt.items(), key=lambda d:(99999999 - d[1]));
n = 0
for e in tuples:
print(e[0].ljust(20) + str(e[1]).rjust(8)); # 虽计算了最长标识,但前100位都较短
n += 1
if n > 100:
break;
#exit();
结果和空想的完全不同——七个核心操作符并不是最热的(如果冒然改解析器就亏大了):
the 25622
if 17399
let 15485
nil 14565
and 12769
0 12449
defun 11855
a 11716
t 10312
is 9790
1 9649
of 9405
to 9294
when 8918
or 7997
setf 7791
setq 7688
name 7195
s 6686
in 6640
i 6408
declare 5964
eq 5714
not 5586
x 5573
type 5558
fixnum 5395
form 5379
seg 4996
stream 4905
list 4836
for 4668
string 4334
unless 4329
dest 4043
car 4009
value 3975
imm0 3949
n 3657
2 3598
p 3594
l 3582
target 3580
buffer 3478
u 3421
be 3409
that 3403
cdr 3395
class 3301
start 3288
as 3249
arg_z 3245
S 3155
end 3130
ppc 3093
args 2991
values 2970
key 2948
it 2947
length 2783
return 2777
with 2753
src 2745
vreg 2736
logior 2722
null 2701
function 2678
error 2650
index 2639
The 2603
object 2578
val 2566
file 2522
code 2507
do 2500
arm 2475
4 2468
this 2407
eql 2401
rest 2397
ash 2394
optional 2366
progn 2360
format 2343
y 2342
x8664 2339
b 2310
cond 2277
8 2274
body 2243
apply 2235
line 2221
vector 2218
result 2209
lambda 2199
are 2198
3 2197
idx 2188
lisp 2161
defconstant 2128
x8632 2121