样例数据
Sarah Sweeney,2002-6-17,2:58,2.58,2:39,2-25,2-55,2:54,2.18,2:55,2:55,2:22,2-21,2.22
需要将数据整理,实现人名+出生日期+成绩的输出
以往的做法是:
def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) (mins, secs) = time_string.split(splitter) return(mins + '.' + secs)def get_coach_data(filename): try: with open(filename) as f: data = f.readline() return(data.strip().split(',')) except IOError as ioerr: print('File error: ' + str(ioerr)) return(None)sarah = get_coach_data('sarah2.txt')(sarah_name, sarah_dob) = sarah.pop(0), sarah.pop(0)print(sarah_name + "'s fastest times are: " + str(sorted(set([sanitize(t) for t in sarah]))[0:3]))
这次加入了字典的做法
字典将数据值与键关联:
key --> valueName "sarah sweeney"DOB "2002-6-17"Times "[2:58,2.58,2:39,2-25,2-55,2:54,2.18,2:55,2:55,2:22,2-21,2.22]"
创建字典的方式可以是
cleese = {} #大括号!!
也可以是
palin = dict()
关联key和value的话是
cleese['Name'] = 'John Cleese' # 一个key 对应一个字符串
或者
cleese['Name'] = ['John Cleese','John Cleese1','John Cleese2','John Cleese3','John Cleese4'] #一个key对应一个list
或者
cleese = {'Name':'abc','Address':'asdasdasda'} #注意是用冒号
另外数据值与key关联后,需要访问数据值里面的某个数据项的话,可以是
cleese['Name'][-1]
类似多维数组使用。
代码改为
#!/usr/bin/python# -*- coding: utf-8 -*-def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) (mins,secs) = time_string.split(splitter) return (mins + '.' + secs)def get_coach_data(filename): try: with open(filename) as f: data = f.readline() return(data.strip().split(',')) except IOError as ioerr: print('File error:' + str(ioerr)) return(None)sarah = get_coach_data('sarah2.txt')sarah_data={}sarah_data['Name'] = sarah.pop(0) #根据数据结构,第一个数据是名字,第二个是生日,第二个之后是成绩,所以分别将相关数据赋值到字典里面。sarah_data['DOB'] = sarah.pop(0)sarah_data['Times'] = sarahprint(sarah_data['Name'] + "'s fastest times are: " + str(sorted(set([sanitize(t) for t in sarah_data['Times']]))[0:3]))
字典的方法优势在于合理使用数据结构。是否知道何时使用列表而何时使用字典,这正式从好的程序员中区分出优秀程序员的一个标准。
字典其实也叫“映射”,“散列”,“关联数组”
为了更加方便的处理多个人的成绩的数据,所以将字典数据转移到函数里面去,直接通过函数生成出字典,并返回需要的数据
#!/usr/bin/python# -*- coding: utf-8 -*-def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) (mins,secs) = time_string.split(splitter) return (mins + '.' + secs)def get_coach_data(filename): try: with open(filename) as f: data = f.readline() templ = data.strip().split(',') return({'Name':templ.pop(0), #这里就是字典 'DOB':templ.pop(0), 'Times':str(sorted(set([sanitize(t) for t in templ]))[0:3])}) except IOError as ioerr: print('File error:' + str(ioerr)) return(None)sarah = get_coach_data('sarah2.txt')james = get_coach_data('james2.txt')print(sarah['Name'] + "'s fastest times are: " + sarah['Times'])
这就是将代码和数据打包在一起。特定函数应用特定数据。
更加正规的做法是建立类。
类是面向对象oop编程模型的东西,类的概念在这里不详细描述。
类可以
1.降低复杂性2.方便维护和扩展
python的类需要有一个self参数,这个参数是用来标识是属于哪个对象实例的
例如:
class Athlete: def __init__(self,value=0): self.thing = value #定义这个类的属性thing def how_big(self) #定义一个方法how_big return(len(self.thing))
btw:init 是类的python固定实现方法,所以是必须的。
你写的代码 --> python执行的代码d = Athlete("Holy Grail") Athlete.__init__(d,"Holy Grail") | | | 类 方法 目标标识符 | | |d.how_big() Athlete.how_big(d)
代码改为:
def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) (mins, secs) = time_string.split(splitter) return(mins + '.' + secs)class Athlete: def __init__(self, a_name, a_dob=None, a_times=[]): self.name = a_name #通过类的属性来定义name,dob和times self.dob = a_dob self.times = a_times def top3(self): return(sorted(set([sanitize(t) for t in self.times]))[0:3])def get_coach_data(filename): try: with open(filename) as f: data = f.readline() templ = data.strip().split(',') return(Athlete(templ.pop(0), templ.pop(0), templ)) except IOError as ioerr: print('File error: ' + str(ioerr)) return(None)james = get_coach_data('james2.txt')julie = get_coach_data('julie2.txt')mikey = get_coach_data('mikey2.txt')sarah = get_coach_data('sarah2.txt')print(james.name + "'s fastest times are: " + str(james.top3()))print(julie.name + "'s fastest times are: " + str(julie.top3()))print(mikey.name + "'s fastest times are: " + str(mikey.top3()))print(sarah.name + "'s fastest times are: " + str(sarah.top3()))
科普:
1.通过在各个对象的属性中保留原始数据,可以支持类扩展来满足将来的其他需求。如果处理数据并作为对象初始化代码的一部分,说明你已对程序员将如何使用这个类做出了假设,而日后这些假设肯定会对你造成障碍。
在类里面增加一个灵活的增加成绩数据的函数
可以是增加单个成绩,或是增加多个成绩
单个成绩用add_time,传入的是字符串 多个成绩是add_times,传入的是list以下是单个成绩的样例:
#!/usr/bin/python# -*- coding: utf-8 -*-class Athlete: def __init__(self,a_name,a_dob=None,a_times=[]): self.name = a_name self.dob = a_dob self.times = a_times def add_time(self,time_value): #这里就是了。 self.times.append(time_value) def top3(self): return (sorted(set([sanitize(t) for t in self.times]))[0:15]) def add_times(self,list_of_times): self.times.extend(list_of_times)def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) (mins,secs) = time_string.split(splitter) return (mins + '.' + secs)def get_coach_data(filename): try: with open(filename) as f: data = f.readline() templ = data.strip().split(',') return (Athlete(templ.pop(0),templ.pop(0),templ)) except IOError as ioerr: print('File error:' + str(ioerr)) return(None)sarah = get_coach_data('sarah2.txt')sarah.add_time('2.88') #这里调用print(sarah.name + "'s fastest times are: " + str(sarah.top3())) #输出结果会改变
观察到这个类有点像list,所以有重复制造车轮的嫌疑,并且功能单一,所以决定集成list类
def sanitize(time_string): if '-' in time_string: splitter = '-' elif ':' in time_string: splitter = ':' else: return(time_string) (mins, secs) = time_string.split(splitter) return(mins + '.' + secs)class AthleteList(list): #继续list类,所以这里要写list的名字 def __init__(self, a_name, a_dob=None, a_times=[]): list.__init__([]) #这里需要初始化list类 self.name = a_name self.dob = a_dob self.extend(a_times) #因为集成list类了,所以这里可以直接使用list的extend方法 def top3(self): return(sorted(set([sanitize(t) for t in self]))[0:3])def get_coach_data(filename): try: with open(filename) as f: data = f.readline() templ = data.strip().split(',') return(AthleteList(templ.pop(0), templ.pop(0), templ)) except IOError as ioerr: print('File error: ' + str(ioerr)) return(None)james = get_coach_data('james2.txt')julie = get_coach_data('julie2.txt')mikey = get_coach_data('mikey2.txt')sarah = get_coach_data('sarah2.txt')print(james.name + "'s fastest times are: " + str(james.top3()))print(julie.name + "'s fastest times are: " + str(julie.top3()))print(mikey.name + "'s fastest times are: " + str(mikey.top3()))print(sarah.name + "'s fastest times are: " + str(sarah.top3()))
原文引用: