本来想使用Python读取源文件,并使用正则表达式的方式过滤。过程中发现文件编码的问题老是搞不定,就先写个简易版本用于过滤日志文件中感兴趣的行,然后将结果输出到另一个文件中。
功能:1、输入源文件路径;2、输入希望保存的文件路径; 3、输入希望过滤的词组,也就是满足这些词组中的任何一个都将视作匹配;
源码如下:
#!/usr/bin/python # -*- coding: UTF-8 -*- import os #import re import time 'readTextFile.py--read and display text file' # 格式化成2016-03-20 11:45:39形式 #print time.strftime("%Y%m%d%H%M%S_log", time.localtime()) #get source file name while True: fsrcname = raw_input('Please input the file to read:') if not os.path.exists(fsrcname): print "Error : '%s' isn't exists." % fsrcname else: print "success : '%s' is exists." % fsrcname break while True: fdestpath = raw_input('Please input the dest file path to output:') if not os.path.isdir(fdestpath): print "Error : '%s' isn't exists." % fdestpath else: print "success : '%s' is ok." % fdestpath break filterstr = [] #word to match while True: restr = raw_input('Please input the word to match for the lines in log,input :q to quit:') if restr == ':q': print "\r\n matching ...,please wait" break elif restr == '': print "Please input valid word to match" else: filterstr.append(restr) all = [] try: fobj = open(fsrcname,'r') lines = fobj.readlines() except: print("*** file open error" ) else: #display the contents of the file to the screen. for eachline in lines: for eachword in filterstr: m = eachline.find(eachword) # if m is not None: if m >= 0: # print m.group() all.append(eachline) break fobj.close() if all is not None: destfilename = time.strftime("%Y%m%d%H%M%S", time.localtime()) destfilename = destfilename + '.log' if fdestpath.endswith('\\') or fdestpath.endswith('/'): fdestpath = fdestpath + destfilename else: fdestpath = fdestpath + '\\'+ destfilename print "filter file complete, now the output the result to the '%s' " % fdestpath try: fobj = open(fdestpath,'w') except: print("*** file open error") else: #display the contents of the file to the screen. for eachline in all: fobj.writelines(eachline) fobj.close() print "file output success, filepath is '%s' " % fdestpath else: print "filter file fail, no match "