Mihai Andrei
{
'name': 'mihai',
'presentations': [
{'where': 'ropython', 'topic': 'numpy'},
{'where': 'tvb node berlin', 'topic': 'tvb gui'}
]
}
array([[-1. , -0.86666667, -0.73333333, -0.6 ],
[-0.46666667, -0.33333333, -0.2 , -0.06666667],
[ 0.06666667, 0.2 , 0.33333333, 0.46666667],
[ 0.6 , 0.73333333, 0.86666667, 1. ]])
visualization: find structure yourself human
model reality: heat transfer
the foundation of all numerics in Python
n-dimensional, homogeneous, fixed-size, contigous array
Installing
$ pip install numpy matplotlib jupyter
# let's meet the beast
import numpy as np
# ndarrays look like lists
a = np.array([0, 1, 2, 3])
a
array([0, 1, 2, 3])
# I can index
a[1] + a[-1]
4
# and slice
a[1:-1]
array([1, 2])
# mutable
a[0] = 4
a
array([4, 1, 2, 3])
# ok, so they are lists ... meh
a[0] = 'nope'
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-6-8bf143d5311d> in <module>() 1 # ok, so they are lists ... meh ----> 2 a[0] = 'nope' ValueError: invalid literal for int() with base 10: 'nope'
# typed
a.dtype
dtype('int64')
# fixed size
a.append(43)
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-8-f8af5ece7ee3> in <module>() 1 # fixed size ----> 2 a.append(43) AttributeError: 'numpy.ndarray' object has no attribute 'append'
Speed:
header | 4 | 1 | 2 | 3 |
# dtypes have defaults
a = np.zeros(4)
print(repr(a.dtype))
print(a)
dtype('float64') [0. 0. 0. 0.]
# but we can ask for a dtype
a = np.zeros(4, dtype=np.uint8)
a
array([0, 0, 0, 0], dtype=uint8)
# multidimensional
a = np.array([
[0, 1, 2],
[3, 4, 5]
])
a.ndim
2
# the shape is fixed, no appends
a.shape
(2, 3)
# indexing in 2d
a[1]
array([3, 4, 5])
# this works
a[1][1]
4
# this is idiomatic
a[1, 1]
4
# a cube
a = np.arange(2*3*2).reshape((2, 3, 2))
a.shape
(2, 3, 2)
a
array([[[ 0, 1], [ 2, 3], [ 4, 5]], [[ 6, 7], [ 8, 9], [10, 11]]])
a[1, :, 1]
array([ 7, 9, 11])
a = np.arange(4*4)
# reshape
a = a.reshape((4, 4))
a
array([[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [12, 13, 14, 15]])
a[1]
array([4, 5, 6, 7])
# all from dim 0, third from dim 1
a[:,2]
array([ 2, 6, 10, 14])
# the interior
a[1:-1, 1:-1]
array([[ 5, 6], [ 9, 10]])
# a more informative a[1]
a[1, :]
array([4, 5, 6, 7])
# have defaults
a[:2, 2:]
array([[2, 3], [6, 7]])
# Slices *ARE VIEWS* in numpy!
a = np.zeros((2, 2))
a
array([[0., 0.], [0., 0.]])
b = a[:, 1]
b
array([0., 0.])
b[0] = 23
b
array([23., 0.])
a
array([[ 0., 23.], [ 0., 0.]])
a = np.array([[2, 1], [3, 0]])
a
array([[2, 1], [3, 0]])
b = np.array([[-1, 1], [0, 1]])
b
array([[-1, 1], [ 0, 1]])
a + b
array([[1, 2], [3, 1]])
# ufuncs are functions that work in this vectorized fashion
np.sin(a)
array([[0.90929743, 0.84147098], [0.14112001, 0. ]])
replace loops
a = range(3) # integers are objects on a heap
b = range(3)
ret = [] # containter of generic objects
for u, v in zip(a, b): # happens in python
ret.append(u + v)
a = np.arange(3) # native machine integers
b = np.arange(3) # fixed size containter, no indirection
ret = a + b # happens in C
a = np.array([[0], [10], [20], [30]])
b = np.array([0, 1, 2])
print('a', a.shape)
print(repr(a))
print('b', b.shape)
print(repr(b))
a + b
a (4, 1) array([[ 0], [10], [20], [30]]) b (3,) array([0, 1, 2])
array([[ 0, 1, 2], [10, 11, 12], [20, 21, 22], [30, 31, 32]])
# why not :?
c = np.array([[0, 10, 20, 30]])
print('c', c.shape)
c + b
c (1, 4)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-35-2645cb6512bc> in <module>() 4 print('c', c.shape) 5 ----> 6 c + b ValueError: operands could not be broadcast together with shapes (1,4) (3,)
a = np.random.randint(3, size=(3, 3))
a
array([[2, 1, 1], [1, 1, 2], [0, 2, 1]])
a > 1
array([[ True, False, False], [False, False, True], [False, True, False]])
a[a>1]
array([2, 2, 2])
a[a>1] = 4
a
array([[4, 1, 1], [1, 1, 4], [0, 4, 1]])
np.any(a == 0)
True
np.all(a < 8)
True
# cant convert to bool directly
bool(a < 8)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-42-56527d2ed582> in <module>() 1 # cant convert to bool directly ----> 2 bool(a < 8) ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Integer indexing
a = np.array([[1, 2], [3, 4], [5, 6]])
a
array([[1, 2], [3, 4], [5, 6]])
rows = [0, 2]
columns = [0, 1]
# for each row pick the values from columns
a[rows, columns]
array([1, 6])
rows = np.array([[0, 0], [-1, -1]])
columns = np.array([[0, -1], [0, -1]])
# same logoc repeating for each dimension in the index arrays
corners = a[rows, columns]
corners
array([[1, 2], [5, 6]])
color_palette = np.array([
[0, 0, 0], [11, 11, 11], [23, 23, 23],
[3, 33, 3], [4, 4, 14], [51, 51, 51]])
color_palette[2]
array([23, 23, 23])
a = np.arange(2*3).reshape((2,3))
a
array([[0, 1, 2], [3, 4, 5]])
# transform all values to colors
a_colors = color_palette[a]
a_colors
array([[[ 0, 0, 0], [11, 11, 11], [23, 23, 23]], [[ 3, 33, 3], [ 4, 4, 14], [51, 51, 51]]])
a_colors[1, 1]
array([ 4, 4, 14])
import pandas as pd
f = pd.read_csv('lala.csv')
f
Names | Births | Country | |
---|---|---|---|
0 | Bob | 968 | US |
1 | Jessica | 155 | US |
2 | Mary | 77 | US |
3 | John | 578 | US |
4 | Mel | 973 | US |
5 | Ion | 32 | RO |
6 | Petrica | 4 | RO |
7 | Mihai | 123 | RO |
# index by column names
f['Names']
0 Bob 1 Jessica 2 Mary 3 John 4 Mel 5 Ion 6 Petrica 7 Mihai Name: Names, dtype: object
f.loc[2:4, 'Names']
2 Mary 3 John 4 Mel Name: Names, dtype: object
f.columns
Index(['Names', 'Births', 'Country'], dtype='object')
group = f.groupby('Country').sum()
group
Births | |
---|---|
Country | |
RO | 159 |
US | 2751 |