Numpy¶

import numpy as np

Create Numpy Array from Python List¶

x = np.array([1, 2, 3, 4, 5])
print(x)
print(type(x))
print(x.dtype)
print(x.shape)
print(x.size)

x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
print(x)
print(type(x))
print(x.dtype)
print(x.shape)
print(x.size)

[1 2 3 4 5]
<class 'numpy.ndarray'>
int64
(5,)
5
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
<class 'numpy.ndarray'>
int64
(4, 3)
12

Create Numpy Array from Built-in Functions¶

x = np.zeros((3, 4))
print(x)
print(x.dtype)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
float64

x = np.ones((3, 4), dtype=int)
print(x)
print(x.dtype)

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]
int64

x = np.full((3, 4), 5)
print(x)

[[5 5 5 5]
 [5 5 5 5]
 [5 5 5 5]]

x = np.eye(5, dtype=int)
print(x)

[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]]

x = np.diag([10, 20, 30, 40])
print(x)

[[10  0  0  0]
 [ 0 20  0  0]
 [ 0  0 30  0]
 [ 0  0  0 40]]

x = np.arange(4, 10)
print(x)

[4 5 6 7 8 9]

x = np.arange(1, 20, 3)
print(x)

[ 1  4  7 10 13 16 19]

x = np.linspace(1, 20, 3)
print(x)

[ 1.  10.5 20. ]

x = np.arange(20)
print(x)

x = np.reshape(x, (4, 5))
print(x)

x = np.arange(20).reshape(4, 5)
print(x)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]

# Defaults to range [0, 1)
x = np.random.random((3, 3))
print(x)

x = np.random.randint(4, 10, (3, 3))
print(x)

[[0.22087021 0.53229498 0.58663932]
 [0.21300366 0.86993844 0.56059265]
 [0.86554777 0.38157681 0.78204005]]
[[7 5 9]
 [7 4 7]
 [9 9 7]]

# mean = 0, std = 0.1
x = np.random.normal(0, 0.1, (5, 5))
print(x)

print(x.mean())
print(x.std())

[[ 0.0122765  -0.12708003  0.11426993 -0.04997364 -0.02526457]
 [-0.04439879 -0.12928117 -0.07242298  0.060275    0.06836317]
 [-0.02163878  0.15118322 -0.09682757 -0.04438684  0.11186937]
 [ 0.03933767 -0.08154594  0.00507315 -0.05448884  0.06592437]
 [ 0.06140125 -0.0002377  -0.07852702 -0.02126833  0.17878217]]
0.0008565443649252224
0.08241037010812466

Accessing, Deleting and Inserting Elements into NDArrays¶

x = np.array([1, 2, 3, 4, 5])

print(x[0])
print(x[2])
print(x[-1])
print(x[-3])

1
3
5
3

## Get  diagonal of a 2d array

x = np.arange(25).reshape(5, 5)
print(x)

print(np.diag(x))
print(np.diag(x, k=1))
print(np.diag(x, k=-2))

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
[ 0  6 12 18 24]
[ 1  7 13 19]
[10 16 22]

## Get unique elements of an array

x = np.array([1, 2, 3, 4, 2, 1, 1, 2, 5])
print(np.unique(x))

[1 2 3 4 5]

x = np.arange(1, 10).reshape(3, 3)

print(x)
print(x[0, 0])
print(x[1, 0])
print(x[2, 1])

## Modify element
x[2, 2] = -9
print(x)

[[1 2 3]
 [4 5 6]
 [7 8 9]]
1
4
8
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8 -9]]

## Delete Rows by Index

x = np.arange(9).reshape(3, 3)
print(x)
print(np.delete(x, [0, 2], axis=0))

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[3 4 5]]

## Delete Columns by Index

x = np.arange(9).reshape(3, 3)
print(x)
print(np.delete(x, [0, 2], axis=1))

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[1]
 [4]
 [7]]

## Append Row

x = np.arange(9).reshape(3, 3)
print(x)
print(np.append(x, [[9, 10, 11]], axis=0))

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]

## Append Column

x = np.arange(9).reshape(3, 3)
print(x)
print(np.append(x, [[9], [10], [11]], axis=1))

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[ 0  1  2  9]
 [ 3  4  5 10]
 [ 6  7  8 11]]

## Insert Elements - 1D / Rank 1 Arrays

x = np.array([1, 2, 5, 6, 7, 8, 9, 10])
print(x)
print(np.insert(x, 2, [3, 4]))

[ 1  2  5  6  7  8  9 10]
[ 1  2  3  4  5  6  7  8  9 10]

## Insert Row at Specified Index - 2D Array

x = np.array([[1, 2, 3], [7, 8, 9]])
print(x)
print(np.insert(x, 1, [4, 5, 6], axis=0))

[[1 2 3]
 [7 8 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]

x = np.array([[1, 2], [4, 5]])
print(x)
print(np.insert(x, 2, [3, 6], axis=1))
print(np.insert(x, 2, 9, axis=1))

[[1 2]
 [4 5]]
[[1 2 3]
 [4 5 6]]
[[1 2 9]
 [4 5 9]]

## Stack 2 Arrays - Vertically

x = np.array([1, 2])
y = np.array([[3, 4], [5, 6]])
print(f"x=\n{x}")
print(f"y=\n{y}")
print(f"vstack=\n {np.vstack((x, y))}")

x=
[1 2]
y=
[[3 4]
 [5 6]]
vstack=
 [[1 2]
 [3 4]
 [5 6]]

## Stack 2 Arrays - Horizontally

x = np.array([[3], [6]])
y = np.array([[1, 2], [4, 5]])
print(f"x=\n{x}")
print(f"y=\n{y}")
print(f"hstack=\n {np.hstack((y, x))}")

x=
[[3]
 [6]]
y=
[[1 2]
 [4 5]]
hstack=
 [[1 2 3]
 [4 5 6]]

Slicing NDArrays¶

Slicing only creates new "views" on the original array, not new copies of the sliced array. To create a copy, use the copy() method.

x = np.arange(1, 21).reshape(4, 5)
print(x)

print(x[0:2, 0:2])

## Notice the subtle difference between the followig
print(x[:, 0:1])
print(x[:, 0])

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]
[[1 2]
 [6 7]]
[[ 1]
 [ 6]
 [11]
 [16]]
[ 1  6 11 16]

Boolean Indexing¶

x = np.arange(25).reshape(5, 5)
print(x)

print(x[(x > 10) & (x < 17)])

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]
[11 12 13 14 15 16]

Set Operations¶

x = np.array([1, 2, 3, 4, 5])
y = np.array([6, 8, 3, 2, 9])

print(np.intersect1d(x, y))
print(np.setdiff1d(x, y))
print(np.union1d(x, y))

[2 3]
[1 4 5]
[1 2 3 4 5 6 8 9]

Sorting¶

x = np.random.randint(1, 11, size=(10, ))
print(x)

## Out-of-place sorting
print(f"oop sorted= \n {np.sort(x)}")
print(f"original= \n {x}")

## In-place sorting
x.sort()
print(f"ip sorted= \n {x}")

[9 1 8 9 6 1 7 6 2 8]
oop sorted= 
 [1 1 2 6 6 7 8 8 9 9]
original= 
 [9 1 8 9 6 1 7 6 2 8]
ip sorted= 
 [1 1 2 6 6 7 8 8 9 9]

Pandas¶

import pandas as pd

Pandas Series¶

Create¶

### With default integer indices
groceries = pd.Series(data=[30, 6, 'Foo', 'Bar'])
print(groceries)

### With custom indices
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['egg', 'apples', 'milk', 'bread'])
print(groceries)

0     30
1      6
2    Foo
3    Bar
dtype: object
egg        30
apples      6
milk      Yes
bread      No
dtype: object

Attributes¶

print(groceries.shape)
print(groceries.ndim)
print(groceries.size)
print(groceries.index)
print(groceries.values)
print('bananas' in groceries)
print('apples' in groceries)

(4,)
1
4
Index(['egg', 'apples', 'milk', 'bread'], dtype='object')
[30 6 'Yes' 'No']
False
True

Accessing Data¶

groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['egg', 'apples', 'milk', 'bread'])
print(groceries['egg'])

print('====\n')

## By labels
print(groceries[['egg', 'apples']])
print(groceries.loc[['egg', 'apples']])
print('====\n')

## By index
print(groceries[[0, -1]])
print(groceries.iloc[[0, -1]])

30
====

egg       30
apples     6
dtype: object
egg       30
apples     6
dtype: object
====

egg      30
bread    No
dtype: object
egg      30
bread    No
dtype: object

Modify Series¶

## Change Element Values
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['egg', 'apples', 'milk', 'bread'])
groceries[['egg']] = 31
print(groceries)

egg        31
apples      6
milk      Yes
bread      No
dtype: object

## Drop Elements - Out-of-Place
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['egg', 'apples', 'milk', 'bread'])
print(groceries.drop(['apples']))
print(groceries)

egg       30
milk     Yes
bread     No
dtype: object
egg        30
apples      6
milk      Yes
bread      No
dtype: object

## Drop Elements - In-Place
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['egg', 'apples', 'milk', 'bread'])
groceries.drop(['apples'], inplace=True)
print(groceries)

egg       30
milk     Yes
bread     No
dtype: object

Arithmetic Operations¶

fruits= pd.Series(data = [10, 6, 3,], index = ['apples', 'oranges', 'bananas'])
fruits + 1

apples     11
oranges     7
bananas     4
dtype: int64

np.sqrt(fruits)

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

fruits[['bananas', 'oranges']] * 10

bananas    30
oranges    60
dtype: int64

Pandas DataFrames¶

Create¶

# We create a dictionary of Pandas Series 
items = {'Bob' : pd.Series(data = [245, 25, 55]),
         'Alice' : pd.Series(data = [40, 110, 500, 45])}

# We print the type of items to see that it is a dictionary
print(type(items))
shopping_carts = pd.DataFrame(items)
shopping_carts

<class 'dict'>

## Create DF from csv
# df = pd.read_csv('myfile.csv')

# We create a dictionary of Pandas Series 
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

# We print the type of items to see that it is a dictionary
print(type(items))
shopping_carts = pd.DataFrame(items)
shopping_carts

<class 'dict'>

## Creating DF Using Subset of Dict

# We Create a DataFrame that only has selected items for Alice
alice_sel_shopping_cart = pd.DataFrame(items, index = ['glasses', 'bike'], columns = ['Alice'])
alice_sel_shopping_cart

Attributes¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}
shopping_carts = pd.DataFrame(items)

shopping_carts.shape

(5, 2)

shopping_carts.ndim

2

shopping_carts.columns

Index(['Bob', 'Alice'], dtype='object')

shopping_carts.values

array([[245., 500.],
       [ nan,  40.],
       [ nan, 110.],
       [ 25.,  45.],
       [ 55.,  nan]])

Accessing Data¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']),
         'Charlie': pd.Series(data = [45, 90, 70, 450], index = ['book', 'glasses', 'bike', 'pants'])}

df = pd.DataFrame(items)
df

Access column(s) by label¶

df[['Bob', 'Alice']]

df.loc[:, ['Bob', 'Alice']]

Access column(s) by index¶

display(df)
df.iloc[:, [0, 2]]

Access row(s) by label¶

df.loc[['bike', 'pants']]

Access row(s) by index¶

display(df)
df.iloc[[0, 2]]

Get N rows from a DF¶

df[:3]

Get N random rows from a DF¶

df.sample(n=2)

Access element by row and column label¶

df['Alice']['bike']  # Column label always comes first

500.0

Get all rows where column value satisfies condition¶

display(df)
df.loc[df['Bob'] > 40]

Modify DF¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']),
         'Charlie': pd.Series(data = [45, 90, 70, 450], index = ['book', 'glasses', 'bike', 'pants'])}

df = pd.DataFrame(items)
df

Add column¶

df['Dan'] = [1, 2, 3, 4, 5]
df

Append columns from a DF to another DF¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']),
         'Charlie': pd.Series(data = [45, 90, 70, 450], index = ['book', 'glasses', 'bike', 'pants'])}

df = pd.DataFrame(items)
df

items_new = {'Dan' : pd.Series(data = [1, 2, 3], index = ['bike', 'pants', 'watch']),}
df_new = pd.DataFrame(items_new)
df.join(df_new)

Insert column at index¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']),
         'Charlie': pd.Series(data = [45, 90, 70, 450], index = ['book', 'glasses', 'bike', 'pants'])}

df = pd.DataFrame(items)

df.insert(1, 'Dan', [1, 2, 3, 4, 5])
df

Add column using sum of previous columns values¶

df['Total'] = df['Bob'] + df['Alice'] + df['Charlie'] + df['Dan']
df

Add rows¶

new_item = {'Bob': 1, 'Alice': 2, 'Charlie': 2}
new_df = pd.DataFrame(new_item, index = ['phones'])
display(new_df)

display(df.append(new_df))

Delete column¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']),
         'Charlie': pd.Series(data = [45, 90, 70, 450], index = ['book', 'glasses', 'bike', 'pants'])}

df = pd.DataFrame(items)
display(df)
df.pop('Bob')
display(df)

Delete multiple columns¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']),
         'Charlie': pd.Series(data = [45, 90, 70, 450], index = ['book', 'glasses', 'bike', 'pants'])}

df = pd.DataFrame(items)
display(df)

display(df.drop(['Bob', 'Alice'], axis=1)) # 1 = columns

Delete multiple rows¶

display(df.drop(['watch', 'book'], axis=0)) # axis=0 => row / index

Transform values of selected columns¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500], index = ['bike', 'pants', 'watch']),
         'Charlie': pd.Series(data = [45, 90, 70], index = ['bike', 'pants', 'watch'])}

df = pd.DataFrame(items)
display(df)

columns_to_tranform = ['Bob', 'Charlie']
df[columns_to_tranform] = df[columns_to_tranform].apply(lambda x: x * 100)
display(df)

#### Substitute values in columns of a DF
df.replace([40, 7000], ['Foo', 'Bar'])

Dealing with NaN¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants']),
         'Charlie': pd.Series(data = [45, 90, 70, 450, 1], index = ['book', 'glasses', 'bike', 'pants', 'watch'])}

df = pd.DataFrame(items)
display(df)

## Counting NaNs
df.isnull().sum().sum()

4

## Counting non-NaNs
df.count().sum()

11

## Drop rows with NaNs
display(df.dropna(axis=0))

## Drop columns with NaNs
display(df.dropna(axis=1))

## Replace all NaNs with 0
display(df.fillna(0))

## Forward fill NaNs (value of previous row)
display(df)
display(df.fillna(method='ffill', axis=0)) # Other methods = 'backfill', 'linear'. Axis can be 1

Statistical Analysis¶

items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500], index = ['bike', 'pants', 'watch']),
         'Charlie': pd.Series(data = [45, 90, 70], index = ['bike', 'pants', 'watch'])}

df = pd.DataFrame(items)
display(df)

## Describe statistical information of DF
df.describe()

df['Bob'].describeribe()

count      3.000000
mean     108.333333
std      119.303534
min       25.000000
25%       40.000000
50%       55.000000
75%      150.000000
max      245.000000
Name: Bob, dtype: float64

df.mean()

Bob        108.333333
Alice      216.666667
Charlie     68.333333
dtype: float64

df.max()

Bob        245
Alice      500
Charlie     90
dtype: int64

Data Visualisation¶

import pandas as pd
import seaborn as sb

df = pd.read_csv('pokemon.csv')
display(df.head())

Univariate Data¶

Categorical data frequency/count as bar chart¶

sb.countplot(data = df, x = 'generation_id')

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e8d9f10>

## Single color bars
base_color = sb.color_palette()[0]
sb.countplot(data = df, x = 'generation_id', color = base_color)

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e7c1e10>

## Sort left to right
gen_order = df['generation_id'].value_counts().index
sb.countplot(data = df, x = 'generation_id', order = gen_order)

<matplotlib.axes._subplots.AxesSubplot at 0x1a1e21bf10>

## Rotate x tick labels

## Without rotation
sb.countplot(data = df, x = 'type_1')

<matplotlib.axes._subplots.AxesSubplot at 0x1a1f25b1d0>

## With rotation
import matplotlib.pyplot as plt

plt.xticks(rotation = 90)

sb.countplot(data = df, x = 'type_1')

<matplotlib.axes._subplots.AxesSubplot at 0x1a1f5571d0>

## Plot Y Bars
sb.countplot(data = df, y = 'type_1')

<matplotlib.axes._subplots.AxesSubplot at 0x1a20035790>

Categorical data relative frequency as bar chart¶

import numpy as np
import matplotlib.pyplot as plt

n_points = df.shape[0]
max_count = df['generation_id'].value_counts().max()
max_percent = max_count / n_points

tick_props = np.arange(0, max_percent,  0.05)
tick_names = ['{:0.2f}'.format(v) for v in tick_props]

sb.countplot(data = df, x = 'generation_id')
plt.yticks(tick_props * n_points, tick_names)
plt.ylabel('proportion')

Text(0, 0.5, 'proportion')

Using Barplot to visualise processed data (not already stored as a column value)¶

df.isna().sum()
sb.barplot(df.isna().sum().index.values, df.isna().sum())
plt.xticks(rotation = 90)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13]),
 <a list of 14 Text xticklabel objects>)

Numerical data histograms¶

df.head()

plt.hist(data = df, x = 'speed', bins = 20);

bins = np.arange(0, df['speed'].max()+5, 5)
plt.hist(data = df, x = 'speed', bins = bins);

sb.distplot(df['speed']);

sb.distplot(df['speed'], kde=False);

Subplots (Stack Plots Horizontally)¶

import matplotlib.pyplot as plt

plt.figure(figsize = [15, 5])

plt.subplot(1, 2, 1)                    # 1 row, 2 cols, subplot 1
sb.distplot(df['speed'], kde=False);

plt.subplot(1, 2, 2)                    # 1 row, 2 cols, subplot 2
sb.distplot(df['speed']);

Plot Subset of Data (Axis Range Limits)¶

plt.hist(data = df, x = 'height');

plt.hist(data = df, x = 'height');
plt.xlim((0, 2))

(0, 2)

Axis Transformations (Log Scale)¶

## Original plot (with linear scale)
plt.hist(data = df, x = 'weight');

## Plots with log scales for x-axis
plt.figure(figsize = [15, 5])

plt.subplot(1, 2, 1)    
sb.distplot(df['weight'], kde=False)
plt.xscale('log')

plt.subplot(1, 2, 2)    
plt.hist(data = df, x = 'weight');
plt.xscale('log')

## Changing x range, whilst in log scale to better visualise data distribution
plt.xscale('log')

min = np.log10(df['weight'].min())
max = np.log10(df['weight'].max())
bins = 10 ** np.arange(min, max + 0.1, 0.1)

plt.hist(data = df, x = 'weight', bins = bins);

Bivariate Data¶

Pairwise Relationship Between Numerical Columns¶

sb.pairplot(df, hue='generation_id');

Categorical daya grouped-by another label¶

df.head()

import matplotlib.pyplot as plt

chart = sb.catplot(x='type_1', kind='count', hue='generation_id', data=df, height=10, aspect=10/7.5);
plt.xticks(rotation = 90);

Anaconda¶

List envs¶

conda info --envs

Activate env¶

conda activate <env_name>

Update all packages¶

conda upgrade -all

Install package¶

conda install package_name

## specifying package version
conda install numpy=1.10

Remove package¶

conda remove package_name

Search package¶

conda search *search_term*

List packages¶

conda list

Jupyter¶

Convert notebook to html¶

jupyter nbconvert --to html notebook.ipynb

# Other formats
# https://nbconvert.readthedocs.io/en/latest/usage.html

Add TOC¶

Ref: https://towardsdatascience.com/jupyter-tools-to-increase-productivity-7b3c6b90be09

	Bob	Alice
bike	245.0	500.0
book	NaN	40.0
glasses	NaN	110.0
pants	25.0	45.0
watch	55.0	NaN

	Bob	Charlie
bike	245.0	70.0
book	NaN	45.0
glasses	NaN	90.0
pants	25.0	450.0
watch	55.0	NaN

	Bob	Alice	Charlie
count	3.000000	3.000000	3.000000
mean	108.333333	216.666667	68.333333
std	119.303534	247.857486	22.546249
min	25.000000	40.000000	45.000000
25%	40.000000	75.000000	57.500000
50%	55.000000	110.000000	70.000000
75%	150.000000	305.000000	80.000000
max	245.000000	500.000000	90.000000

	id	species	generation_id	height	weight	base_experience	type_1	type_2	hp	attack	defense	speed	special-attack	special-defense
0	1	bulbasaur	1	0.7	6.9	64	grass	poison	45	49	49	45	65	65
1	2	ivysaur	1	1.0	13.0	142	grass	poison	60	62	63	60	80	80
2	3	venusaur	1	2.0	100.0	236	grass	poison	80	82	83	80	100	100
3	4	charmander	1	0.6	8.5	62	fire	NaN	39	52	43	65	60	50
4	5	charmeleon	1	1.1	19.0	142	fire	NaN	58	64	58	80	80	65

	Bob	Alice	Charlie
bike	24500	40	4500
pants	2500	110	9000

	Bob	Alice	Charlie
bike	24500	40	4500
pants	2500	110	9000
watch	5500	500	7000

	Bob	Alice	Charlie
bike	24500	40	4500
pants	2500	110	9000
watch	5500	500	7000

Up-to-date version: Python Cheat Sheet

Table of Contents

Numpy¶

Create Numpy Array from Python List¶

Create Numpy Array from Built-in Functions¶

Accessing, Deleting and Inserting Elements into NDArrays¶

Slicing NDArrays¶

Boolean Indexing¶

Set Operations¶

Sorting¶

Pandas¶

Pandas Series¶

Create¶

Attributes¶

Accessing Data¶

Modify Series¶

Arithmetic Operations¶

Pandas DataFrames¶

Create¶

Attributes¶

Accessing Data¶

Access column(s) by label¶

Access column(s) by index¶

Access row(s) by label¶

Access row(s) by index¶

Get N rows from a DF¶

Get N random rows from a DF¶

Access element by row and column label¶

Get all rows where column value satisfies condition¶

Modify DF¶

Add column¶

Append columns from a DF to another DF¶

Insert column at index¶

Add column using sum of previous columns values¶

Add rows¶

Delete column¶

Delete multiple columns¶

Delete multiple rows¶

Transform values of selected columns¶

Dealing with NaN¶

Statistical Analysis¶

Data Visualisation¶

Univariate Data¶

Categorical data frequency/count as bar chart¶

Categorical data relative frequency as bar chart¶

Using Barplot to visualise processed data (not already stored as a column value)¶

Numerical data histograms¶

Subplots (Stack Plots Horizontally)¶

Plot Subset of Data (Axis Range Limits)¶

Axis Transformations (Log Scale)¶

Bivariate Data¶

Pairwise Relationship Between Numerical Columns¶

Categorical daya grouped-by another label¶

Anaconda¶

List envs¶

Activate env¶

Update all packages¶

Install package¶

Remove package¶

Search package¶

List packages¶

Jupyter¶

Convert notebook to html¶

Add TOC¶