python数据结构的处理

元组存储空间小 访问速度快 在元组中使用索引去访问大量数据的时候可读性低

1
2
3
4
5
# 可以定义一系列的数值常量去索引
NAME, AGE, SEX = range(3)
student = ('rose', 22, 'male')

print(student[AGE])
22
1
# 使用标准库中collections.namedtuple代替内置tuple
1
2
3
4
5
6
from collections import namedtuple

# 创建一个类 第二个参数内的部分可以作为属性访问
Student = namedtuple('Student', ['name', 'age', 'sex'])
s = Student('rose', 23, 'male')
s
Student(name='rose', age=23, sex='male')
1
s.name
'rose'
1
2
# Student是内置元组tuple的子类
issubclass(Student, tuple)
True

统计相关数据以及频率

1
2
3
from random import randint
data = [randint(1, 20) for _ in range(20)]
c = dict.fromkeys(data, 0)
1
c
{1: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 11: 0, 13: 0, 15: 0, 16: 0, 18: 0, 19: 0}
1
2
for x in data:
c[x] += 1
1
c
{1: 2, 3: 2, 4: 1, 5: 1, 6: 1, 7: 1, 11: 1, 13: 3, 15: 3, 16: 2, 18: 1, 19: 2}
1
2
# 当然还有更好的方法
data
[15, 11, 18, 16, 19, 19, 15, 15, 16, 13, 5, 3, 1, 13, 13, 4, 7, 6, 1, 3]
使用collections中的Counter
1
from collections import Counter
1
c2 = Counter(data)
1
2
# 实例即可统计频率
c2
Counter({1: 2,
         3: 2,
         4: 1,
         5: 1,
         6: 1,
         7: 1,
         11: 1,
         13: 3,
         15: 3,
         16: 2,
         18: 1,
         19: 2})
1
c2[1]
2
1
c2.most_common()
[(15, 3),
 (13, 3),
 (16, 2),
 (19, 2),
 (3, 2),
 (1, 2),
 (11, 1),
 (18, 1),
 (5, 1),
 (4, 1),
 (7, 1),
 (6, 1)]
1
c2.most_common(3)
[(15, 3), (13, 3), (16, 2)]
1
# 可知 most_commom方法的返回值以及参数的使用

根据字典值去排序

1
2
from random import randint
d = {x: randint(60, 99) for x in 'xyzabc'}
1
d.keys()
dict_keys(['x', 'y', 'z', 'a', 'b', 'c'])
1
d.values()
dict_values([96, 81, 79, 87, 85, 86])
1
new_list = list(zip(d.values(), d.keys()))
1
new_list
[(96, 'x'), (81, 'y'), (79, 'z'), (87, 'a'), (85, 'b'), (86, 'c')]
1
sorted(new_list)
[(79, 'z'), (81, 'y'), (85, 'b'), (86, 'c'), (87, 'a'), (96, 'x')]

同时可以利用sortrd函数 传入参数

1
d.items()
dict_items([('x', 96), ('y', 81), ('z', 79), ('a', 87), ('b', 85), ('c', 86)])
1
sorted(d.items(), key=lambda x: x[1])
[('z', 79), ('y', 81), ('b', 85), ('c', 86), ('a', 87), ('x', 96)]
1
2
# 上述第一个参数是一个可迭代的数据 key是对每一个迭代的数据进行处理的lambda函数 需要处理的数据为x[1]
# 即为对第二个参数进行排序 同样实现了上述排序的需求

公共键问题

1
2
3
4
5
from random import randint, sample

# sample方法随机抽取序列中的某些元素

sample('abcdefg', 3)
['f', 'a', 'g']
1
s1 = {x: randint(1, 4) for x in sample('abcdefg', randint(3, 6))}
1
s2 = {x: randint(1, 4) for x in sample('abcdefg', randint(3, 6))}
1
s3 = {x: randint(1, 4) for x in sample('abcdefg', randint(3, 6))}
1
s1
{'a': 3, 'c': 3, 'g': 1}
1
s2
{'a': 1, 'b': 4, 'c': 4, 'd': 3, 'e': 3, 'f': 2}
1
s3
{'c': 2, 'd': 1, 'e': 1, 'f': 1}
1
# 得到了某个元素每一轮出现的次数

为了得到公共键

1
# 字典的.keys()方法 得到字典keys的集合 可以使用集合的算术运算 交并补
1
s1.keys() & s2.keys() & s3.keys()
{'c'}
1
2
3
# 当数据较大时 
# py3中的reduce需要导入
from functools import reduce
1
reduce(lambda a,b: a & b, map(dict.keys, [s1, s2, s3]))
{'c'}

字典有序问题

1
2
3
4
5
6
7
# 使用OrderDict

from collections import OrderedDict
d = OrderedDict()
d['a'] = 1
d['b'] = 2
d['c'] = 3
1
2
3
for x in d:
print(x)
d
a
b
c



OrderedDict([('a', 1), ('b', 2), ('c', 3)])