list
0 1 2 3 4 5 6 7 8 ...
┌─┬─┬─┬─┬─┬─┬─┬─┬─
│ │ │ │ │ │ │ │ │ ...
└─┴─┴─┴─┴─┴─┴─┴─┴─
┌─┬─┬─┬─┬─┬─┬─┬─┐
│ │
├─┼─┼─┼─┼─┼─┼─┼─┤
│ │
├─┼─┼─┼─┼─┼─┼─┼─┤
│ │
├─┼─┼─┼─┼─┼─┼─┼─┤
│ ... │
Memory hierarchy (but see Gustavo Duarte’s What Your Computer Does While You Wait and Dan Luu’s How Misaligning Data Can Increase Performance 12×)
Why do I ignore those issues?
Why do I ignore those issues?
The Standard Library does
Record
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → ...
├─┼─┼─┼─┼─┼─┼─┼─┤
...
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → <type 'int'>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ 123 │
└─┴─┴─┴─┴─┴─┴─┴─┘
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → <type 'float'>
├─┼─┼─┼─┼─┼─┼─┼─┤
│2.997924580e+08│
└─┴─┴─┴─┴─┴─┴─┴─┘
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → <type 'str'>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ length = 6 │
├─┼─┼─┼─┼─┼─┼─┼─┤
│ hash │
├─┼─┼─┼─┼─┼─┼─┼─┤
│ flags │
├─┼─┼─┼─┼─┼─┼─┼─┤
│ address = 0 │
├─┼─┼─┼─┼─┼─┼─┴─┘
│H e l l o !│
└─┴─┴─┴─┴─┴─┘
┌─┬─┬─┬─┬─┬─┬─┬─┐
a + 0 │reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
a + 8 │address of type│ → <type 'int'>
├─┼─┼─┼─┼─┼─┼─┼─┤
a + 16 │ 123 │
└─┴─┴─┴─┴─┴─┴─┴─┘
struct
>>> from struct import pack, unpack
>>> data = pack('IIf', 2, 9, 20.0)
>>> len(data)
12
>>> data.encode('hex')
'02000000090000000000a041'
>>> unpack('IIf', data)
(2, 9, 20.0)
Array
a + 0 1 2 3 4 5
┌─┬─┬─┬─┬─┬─┐
│H e l l o !│
└─┴─┴─┴─┴─┴─┘
┌─┬─┬─┬─┬─┬─┬─┬─┐
a+0 a+4 │h |e |
├─┼─┼─┼─┼─┼─┼─┼─┤
a+8 a+12 │l |l │
├─┼─┼─┼─┼─┼─┼─┼─┤
a+16 a+20 │o |! │
└─┴─┴─┴─┴─┴─┴─┴─┘
array
>>> from array import array
>>> a = array('d', [1.0, 2.0, 3.0, 4.0, 5.0])
>>> a[3]
4.0
>>> a[3] = 44.0
>>> a
array('d', [1.0, 2.0, 3.0, 44.0, 5.0])
>>> a.itemsize
8
>>> len(a)
5
>>> len(a) * a.itemsize
40
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → <type 'float'>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ 1.234 e+00 │
└─┴─┴─┴─┴─┴─┴─┴─┘
↑ ↓
┌─┬─┬─┬─┬─┬─┬─┬─┐
│ 1.234 │ 5.678 │
├─┼─┼─┼─┼─┼─┼─┼─┤
>>> import numpy as np
>>> a = np.arange(0, 5)
>>> print(a)
[0 1 2 3 4]
>>> a.sum()
10
>>> print(a * a)
[ 0 1 4 9 16]
>>> print(a + 10)
[10 11 12 13 14]
tuple
>>> tup = (1, 2.0, 'Hello!')
>>> tup = (1, 2.0, 'Hello!')
>>> tup = (1, 2.0, 'Hello!')
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → <type tuple>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ 3 │
├─┼─┼─┼─┼─┼─┼─┼─┤
│ address │ → <int 1>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ address │ → <float 2.0>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ address │ → <str 'Hello!'>
└─┴─┴─┴─┴─┴─┴─┴─┘
len(tup)
tup[i]
>>> from collections import namedtuple
>>> City = namedtuple('City', 'name province')
>>> m = City('Montréal', 'Québec')
>>> m.name
'Montréal'
>>> m.province
'Québec'
tuple
>>> t = (1, 2, 3)
>>> len(t)
3
>>> t[9]
Traceback (most recent call last):
...
IndexError: tuple index out of range
list
─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─
#######│ list │#######
─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─
names
references
↓
─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─
#######│ list │#######
─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → <type list>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ 3 │
├─┼─┼─┼─┼─┼─┼─┼─┤ ┌─┬─┬─┬─┬─┬─┬─┬─┐
│ array address │ → │ address │ →
├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ 4 │ │ address │ →
└─┴─┴─┴─┴─┴─┴─┴─┘ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ address │ →
├─┼─┼─┼─┼─┼─┼─┼─┤
│ - │
└─┴─┴─┴─┴─┴─┴─┴─┘
┌─┬─┬─┬─┬─┬─┬─┬─┐
│reference count│
├─┼─┼─┼─┼─┼─┼─┼─┤
│address of type│ → <type list>
├─┼─┼─┼─┼─┼─┼─┼─┤
│ 3 │
├─┼─┼─┼─┼─┼─┼─┼─┤ ┌─┬─┬─┬─┬─┬─┬─┬─┐
│ array address │ → │ address │ →
├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ 4 │ │ address │ →
└─┴─┴─┴─┴─┴─┴─┴─┘ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ address │ →
├─┼─┼─┼─┼─┼─┼─┼─┤
│ - │
└─┴─┴─┴─┴─┴─┴─┴─┘
>>> list(range(10))
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> sum(range(10))
45
>>> sum(range(1000))
499500
>>> sum(range(1000000))
499999500000
>>> list(range(10))
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> sum(range(10))
45
>>> sum(range(1000))
499500
>>> sum(range(1000000))
499999500000
Appends | Reallocations | Address copies |
---|---|---|
1,000 | 27 | 8,603 |
1,000,000 | 85 | 9,692,995 |
1,000,000,000 | 143 | 9,058,551,346 |
amortization
┌───────────────────────┬───────────┐
│ 991 item addresses │ 129 slots │
└───────────────────────┴───────────┘
Speed vs space
list
# Single-item:
s[i]
s[i] = v
del s[i]
s.append(v)
s.insert(i, v)
s.pop(i)
v in s
# Several-item:
t = s[7:-7]
s == t
s.count('Montréal')
s.index('Montréal')
s.remove('Montréal')
for item in s: ...
problem
# Single-item operations that touch:
# 1 item # n items
s[i]
s[i] = v
del s[len(s)] del s[0]
s.append(v)
s.insert(len(s), v) s.insert(0, v)
s.pop() s.pop(0)
v in s
# Reverse order
while mylist:
item = list.pop(0)
print item
for item in reversed(mylist):
print item
list
copy vs view
t = s[400:500]
┌─────────────────────────┐
│ 1,000 item list │
└─────────────────────────┘
8 * 100 = 800 bytes copied
↓
┌───────────────┐
│ 100 item list │
└───────────────┘
t = s[400:500]
┌────────────────────────┐
│ 1,000 floats │
└────────────────────────┘
↑
┌─────────┐
│ slice │
│+400 <500│
└─────────┘
┌─┬─┬─┬─┬─┬─┬─┬─┐
s[0] │ address │ → item
├─┼─┼─┼─┼─┼─┼─┼─┤
s[1] │ address │ → item
├─┼─┼─┼─┼─┼─┼─┼─┤
s[2] │ address │ → item
├─┼─┼─┼─┼─┼─┼─┼─┤
s[3] │ address │ → item
├─┼─┼─┼─┼─┼─┼─┼─┤
⋮
>>> d = {}
>>> d[1] = 'un'
>>> d[2.0] = 'deux'
>>> d['three'] = 'trois'
┌─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┬─┐
│ hash │ key address │ value address │
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
│ hash │ key address │ value address │
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
│ hash │ key address │ value address │
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
│ ... │ ... │ ... │
see
Speed vs space
# Single item # Several item
d[key] d == d2
d[key] = v d.update(d2)
key in d list(d)
d.get(key) for k in d.keys(): ...
d.pop(key) for k in d.values(): ...
del d[key] for k, v in d.items(): ...
>>> from collections import defaultdict
>>>
>>> animals = defaultdict(list)
>>> animals['Mammal'].append('dog')
>>> animals['Reptile'].append('lizard')
>>> animals['Mammal'].append('cat')
{'Mammal': ['dog', 'cat'],
'Reptile': ['lizard']}
>>> # Most common 2 letters in a phrase
>>>
>>> from collections import Counter
>>> c = Counter('château frontenac, quebec')
>>> c.most_common(2)
[('e', 4), ('c', 3)]
{('google.com', 80): 7125773,
('google.com', 443): 1520951,
('heroku.com', 22): 245080,
('linkedin.com', 443): 5724124,
('rackspace.com', 443): 5002620}
project_languages = {
frozenset(['python']): 328,
frozenset(['python', 'js']): 217,
frozenset(['ruby']): 274,
frozenset(['ruby', 'js']): 261,
frozenset(['js']): 79,
}
OrderedDict
>>> from collections import OrderedDict
>>>
>>> d = dict.fromkeys('Montréal')
>>> list(d.keys())
['a', 'r', 't', 'é', 'M', 'l', 'o', 'n']
>>>
>>> d = OrderedDict.fromkeys('Montréal')
>>> list(d.keys())
['M', 'o', 'n', 't', 'r', 'é', 'a', 'l']
# Fast 1-member ops
member in t
t.add(member)
t.remove(member)
# Fast n-member ops
t | u
t & u
t - u
>>> a = [1.0, 2.2, 3.0, 3.4, 3.5, 4.0, 4.5]
>>> import bisect
>>> left = bisect.bisect_left(a, 3.0)
>>> right = bisect.bisect_right(a, 4.0)
>>> a[left:right]
[3.0, 3.4, 3.5, 4.0]
Use the deque
# Efficient operations:
>>> from collections import deque
>>> d = deque()
>>> d.append(5)
>>> d.appendleft(4)
>>> d.pop()
5
>>> d.popleft()
4
# Great for threading
# Great for multiprocessing
from queue import Queue
>>> import heapq
>>> a = [6, 2, 4, 3, 1, 5, 7]
>>> heapq.heapify(a)
>>> a
[1, 2, 4, 3, 6, 5, 7]
>>> print(' {}\n {}\n{}'
... .format(a[0:1], a[1:3], a[3:7]))
[1]
[2, 4]
[3, 6, 5, 7]
>>> heapq.heappop(a)
1
>>> heapq.heappop(a)
2
>>> heapq.heappush(a, 0)
>>> heapq.heappop(a)
0
# Linked list in the old days
┌─┬─┬─┬─┬─┬─┬─┬─┐ ┌─┬─┬─┬─┬─┬─┬─┬─┐
│ next address │ → │ next address │ →
├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ data │ │ data │
├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ ... │ │ ... │
├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ ... │ │ ... │
├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤
│ ... │ │ ... │
└─┴─┴─┴─┴─┴─┴─┴─┘ └─┴─┴─┴─┴─┴─┴─┴─┘
# Linked list when "everything is an object"
┌─┬─┬─┬─┬─┬─┬─┬─┐ ┌─┬─┬─┬─┬─┬─┬─┬─┐
│ next address │ → │ next address │ →
├─┼─┼─┼─┼─┼─┼─┼─┤ ├─┼─┼─┼─┼─┼─┼─┼─┤
│object address │ │object address │
└─┴─┴─┴─┴─┴─┴─┴─┘ └─┴─┴─┴─┴─┴─┴─┴─┘
↓ ↓
<object> <object>
see
@brandon_rhodes