Introduction to NumPy

NumPy (Numerical Python) is a fundamental package for scientific computing in Python. It provides support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays. In this lesson, we'll explore the basics of NumPy and how to use it for numerical computations.

What is NumPy?

NumPy is the foundation for many Python data science libraries, including Pandas, SciPy, and scikit-learn. It provides:

  • A powerful N-dimensional array object
  • Sophisticated broadcasting functions
  • Tools for integrating C/C++ and Fortran code
  • Linear algebra, Fourier transform, and random number capabilities

Installing NumPy

You can install NumPy using pip:

pip install numpy

Or using conda:

conda install numpy

Importing NumPy

The conventional way to import NumPy is:

import numpy as np

NumPy Arrays

The core of NumPy is the array object, which is a grid of values, all of the same type, indexed by a tuple of non-negative integers. The number of dimensions is the rank of the array, and the shape of an array is a tuple of integers giving the size of the array along each dimension.

Creating Arrays

import numpy as np

# From a list
a = np.array([1, 2, 3, 4, 5])
print(a)  # [1 2 3 4 5]
print(type(a))  # 

# From a list of lists
b = np.array([[1, 2, 3], [4, 5, 6]])
print(b)
# [[1 2 3]
#  [4 5 6]]

# Array of zeros
c = np.zeros((3, 4))
print(c)
# [[0. 0. 0. 0.]
#  [0. 0. 0. 0.]
#  [0. 0. 0. 0.]]

# Array of ones
d = np.ones((2, 3))
print(d)
# [[1. 1. 1.]
#  [1. 1. 1.]]

# Array with a range of values
e = np.arange(10)
print(e)  # [0 1 2 3 4 5 6 7 8 9]

# Array with evenly spaced values
f = np.linspace(0, 1, 5)
print(f)  # [0.   0.25 0.5  0.75 1.  ]

# Identity matrix
g = np.eye(3)
print(g)
# [[1. 0. 0.]
#  [0. 1. 0.]
#  [0. 0. 1.]]

# Random values
h = np.random.random((2, 2))
print(h)
# [[0.12345678 0.23456789]
#  [0.34567891 0.45678912]]

Array Attributes

import numpy as np

a = np.array([[1, 2, 3], [4, 5, 6]])

# Shape (dimensions)
print(a.shape)  # (2, 3)

# Number of dimensions
print(a.ndim)  # 2

# Size (total number of elements)
print(a.size)  # 6

# Data type
print(a.dtype)  # int64

# Item size (bytes)
print(a.itemsize)  # 8

# Total size (bytes)
print(a.nbytes)  # 48

Array Indexing and Slicing

import numpy as np

a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
print(a)
# [[ 1  2  3  4]
#  [ 5  6  7  8]
#  [ 9 10 11 12]]

# Indexing
print(a[0, 0])  # 1
print(a[2, 3])  # 12

# Slicing
print(a[0:2, 1:3])
# [[2 3]
#  [6 7]]

# All rows, specific columns
print(a[:, 1:3])
# [[ 2  3]
#  [ 6  7]
#  [10 11]]

# Specific rows, all columns
print(a[1:, :])
# [[ 5  6  7  8]
#  [ 9 10 11 12]]

# Boolean indexing
print(a > 5)
# [[False False False False]
#  [False  True  True  True]
#  [ True  True  True  True]]

print(a[a > 5])
# [ 6  7  8  9 10 11 12]

Array Operations

NumPy provides a large set of mathematical functions that operate on arrays.

Arithmetic Operations

import numpy as np

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Addition
print(a + b)  # [5 7 9]

# Subtraction
print(a - b)  # [-3 -3 -3]

# Multiplication
print(a * b)  # [4 10 18]

# Division
print(a / b)  # [0.25 0.4  0.5 ]

# Power
print(a ** 2)  # [1 4 9]

# With scalars
print(a + 2)  # [3 4 5]
print(a * 2)  # [2 4 6]

Universal Functions (ufuncs)

import numpy as np

a = np.array([0, np.pi/4, np.pi/2])

# Trigonometric functions
print(np.sin(a))  # [0.         0.70710678 1.        ]
print(np.cos(a))  # [1.00000000e+00 7.07106781e-01 6.12323400e-17]
print(np.tan(a))  # [0.00000000e+00 1.00000000e+00 1.63312394e+16]

# Exponential and logarithmic functions
b = np.array([1, 2, 3])
print(np.exp(b))  # [ 2.71828183  7.3890561  20.08553692]
print(np.log(b))  # [0.         0.69314718 1.09861229]
print(np.log10(b))  # [0.         0.30103    0.47712125]

# Rounding functions
c = np.array([1.2, 2.7, 3.5, 4.8])
print(np.floor(c))  # [1. 2. 3. 4.]
print(np.ceil(c))   # [2. 3. 4. 5.]
print(np.round(c))  # [1. 3. 4. 5.]

Aggregation Functions

import numpy as np

a = np.array([[1, 2, 3], [4, 5, 6]])

# Sum
print(np.sum(a))  # 21
print(np.sum(a, axis=0))  # [5 7 9]
print(np.sum(a, axis=1))  # [ 6 15]

# Min and max
print(np.min(a))  # 1
print(np.max(a))  # 6
print(np.min(a, axis=0))  # [1 2 3]
print(np.max(a, axis=1))  # [3 6]

# Mean and median
print(np.mean(a))  # 3.5
print(np.median(a))  # 3.5

# Standard deviation and variance
print(np.std(a))  # 1.707825127659933
print(np.var(a))  # 2.9166666666666665

Array Reshaping

import numpy as np

a = np.arange(12)
print(a)  # [ 0  1  2  3  4  5  6  7  8  9 10 11]

# Reshape to 2D array
b = a.reshape(3, 4)
print(b)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]

# Reshape to 3D array
c = a.reshape(2, 2, 3)
print(c)
# [[[ 0  1  2]
#   [ 3  4  5]]
#  [[ 6  7  8]
#   [ 9 10 11]]]

# Flatten array
print(b.flatten())  # [ 0  1  2  3  4  5  6  7  8  9 10 11]

# Transpose
print(b.T)
# [[ 0  4  8]
#  [ 1  5  9]
#  [ 2  6 10]
#  [ 3  7 11]]

Broadcasting

Broadcasting is a powerful mechanism that allows NumPy to work with arrays of different shapes when performing arithmetic operations.

import numpy as np

# Broadcasting with scalars
a = np.array([1, 2, 3])
print(a + 2)  # [3 4 5]

# Broadcasting with arrays
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])
print(a + b)
# [[11 22 33]
#  [14 25 36]]

# More complex broadcasting
a = np.array([[1, 2, 3], [4, 5, 6]])  # Shape: (2, 3)
b = np.array([[10], [20]])  # Shape: (2, 1)
print(a + b)
# [[11 12 13]
#  [24 25 26]]

Linear Algebra

NumPy provides a comprehensive set of linear algebra functions.

import numpy as np

# Matrix multiplication
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
print(np.dot(a, b))
# [[19 22]
#  [43 50]]

# Using the @ operator (Python 3.5+)
print(a @ b)
# [[19 22]
#  [43 50]]

# Determinant
print(np.linalg.det(a))  # -2.0000000000000004

# Inverse
print(np.linalg.inv(a))
# [[-2.   1. ]
#  [ 1.5 -0.5]]

# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(a)
print(eigenvalues)  # [-0.37228132  5.37228132]
print(eigenvectors)
# [[-0.82456484 -0.41597356]
#  [ 0.56576746 -0.90937671]]

# Solving linear equations: Ax = b
a = np.array([[1, 2], [3, 4]])
b = np.array([5, 6])
x = np.linalg.solve(a, b)
print(x)  # [-4.  4.5]

Random Number Generation

import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Random integers
print(np.random.randint(1, 10, size=5))  # [6 3 7 4 6]

# Random floats between 0 and 1
print(np.random.random(size=5))  # [0.37454012 0.95071431 0.73199394 0.59865848 0.15601864]

# Random floats from normal distribution
print(np.random.normal(0, 1, size=5))  # [ 0.42891263  1.26593626 -0.8667404  -0.67888615 -0.09470897]

# Random floats from uniform distribution
print(np.random.uniform(-1, 1, size=5))  # [ 0.14092422 -0.19771088 -0.52249697  0.6991793  -0.29984477]

# Shuffle an array
a = np.arange(10)
np.random.shuffle(a)
print(a)  # [2 8 4 9 1 6 7 3 0 5]

# Random choice
print(np.random.choice([1, 2, 3, 4, 5], size=3))  # [3 2 1]
print(np.random.choice([1, 2, 3, 4, 5], size=3, p=[0.1, 0.2, 0.3, 0.2, 0.2]))  # [3 3 5]

File I/O with NumPy

import numpy as np

# Save array to file
a = np.array([[1, 2, 3], [4, 5, 6]])
np.save('array.npy', a)

# Load array from file
b = np.load('array.npy')
print(b)
# [[1 2 3]
#  [4 5 6]]

# Save multiple arrays to a single file
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
np.savez('arrays.npz', a=a, b=b)

# Load multiple arrays
data = np.load('arrays.npz')
print(data['a'])  # [1 2 3]
print(data['b'])  # [4 5 6]

# Save as text file
a = np.array([[1, 2, 3], [4, 5, 6]])
np.savetxt('array.txt', a, delimiter=',')

# Load from text file
c = np.loadtxt('array.txt', delimiter=',')
print(c)
# [[1. 2. 3.]
#  [4. 5. 6.]]

Performance Comparison

NumPy operations are significantly faster than equivalent operations in pure Python, especially for large arrays.

import numpy as np
import time

# Python list
size = 1000000
python_list = list(range(size))

# NumPy array
numpy_array = np.arange(size)

# Multiply each element by 2
start_time = time.time()
python_result = [x * 2 for x in python_list]
python_time = time.time() - start_time
print(f"Python time: {python_time:.6f} seconds")

start_time = time.time()
numpy_result = numpy_array * 2
numpy_time = time.time() - start_time
print(f"NumPy time: {numpy_time:.6f} seconds")
print(f"NumPy is {python_time / numpy_time:.1f}x faster")

When to Use NumPy

NumPy is particularly useful for:

  • Mathematical and numerical operations on large arrays or matrices
  • Linear algebra operations
  • Random number generation
  • Integration with C/C++ and Fortran code
  • As a foundation for other scientific libraries (Pandas, SciPy, scikit-learn, etc.)

Try experimenting with NumPy in the code playground below!

Quick Quiz

Which function would you use to create a NumPy array with evenly spaced values within a given interval?

Code Playground

Code output will appear here...