首先是测试文本,为了有效验证结果是否正确,使用'new.txt'文件如下
I do love you.
Do you love me?
Yes, I do.
(注意最后一句写完之后也要回车,保证存在n)
程序:
# wordstats.py
keep={'a','b','c','d','e','f','g',
'h','i','j','k','l','m','n',
'o','p','q','r','s','t','u',
'v','w','x','y','z',' ','-',"'",'n'}
def normalize(s):
result=''
for c in s.lower():
if c in keep:
result=result+c
return result
def make_freq_dict(s):
s=normalize(s)
words=s.split()
d={}
for w in words:
if w in d:
d[w]+=1
else:
d[w]=1
return d
def print_file_stats(fname):
s=open(fname,'r').read()
num_chars=len(s)
num_lines=s.count('n')
d=make_freq_dict(s)
num_words=sum(d[w] for w in d)
lst=[(d[w],w) for w in d]
lst.sort()
lst.reverse()
print("The file '%s' has: "%fname)
print(' ',str(num_chars),' characters')
print(' ',str(num_lines),' lines')
print(' ',str(num_words),' words')
print("nThe top 10 most frequent words are:")
i=1
for count, word in lst[:10]:
print('No.'+str(i),count,word)
i+=1
IDLE shell结果
>>> print_file_stats('new.txt')
The file 'new.txt' has:
42 characters
3 lines
11 words
The top 10 most frequent words are:
No.1 3 do
No.2 2 you
No.3 2 love
No.4 2 i
No.5 1 yes
No.6 1 me