🥦 🥩

Search & sort

Let’s test the display of jupyter notebooks in my blog. I’m programming several searching and sorting algorithms and compare their runtimes.

import random
import timeit
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme(style="ticks", rc={"font.sans-serif": "Inter"})

random.seed(42)

Searching algorithms

# linear search | O(n)

def linear_search(unordered_list, search_value):
    for index in range(len(unordered_list)):
        if unordered_list[index] == search_value:
            return True
    return False
# binary search (only applies to ordered lists) | O(logn)

def binary_search(ordered_list, search_value):
    first = 0
    last = len(ordered_list) - 1

    while first <= last:
        middle = (first + last)//2
        if search_value == ordered_list[middle]:
            return True
        elif search_value < ordered_list[middle]:
            last = middle - 1
        else: 
            first = middle + 1

Actually, I like the recursive implementation even more:

def binary_search_recursive(ordered_list, search_value):
  # define the base case
  if len(ordered_list) == 0:
    return False
  else:
    middle = len(ordered_list)//2
    # check whether the search value equals the value in the middle
    if search_value == ordered_list[middle]:
        return True
    elif search_value < ordered_list[middle]:
        # call recursively with the left half of the list
        return binary_search_recursive(ordered_list[:middle], search_value)
    else:
        # call recursively with the right half of the list
        return binary_search_recursive(ordered_list[middle+1:], search_value)

Sorting algorithms

# bubble sort | O(n²)

def bubble_sort(my_list):
    list_length = len(my_list)
    is_sorted = False
    while not is_sorted:
        is_sorted = True
        for i in range(list_length-1):
            if my_list[i] > my_list[i+1]:
                my_list[i], my_list[i+1] = my_list[i+1], my_list[i]
                is_sorted = False
        list_length -= 1
    return my_list
# selection sort | O(n²)

def selection_sort(my_list):
    list_length = len(my_list)
    for i in range(list_length-1):
        lowest = my_list[i]
        index = i
        for j in range(i+1, list_length):
            if my_list[j] < lowest:
                lowest = my_list[j]
                index = j
        my_list[i], my_list[index] = my_list[index], my_list[i]
    return my_list
# insertion sort | O(n²)

def insertion_sort(my_list):
    for i in range(1, len(my_list)):
        number_to_order = my_list[i]
        j = i - 1
        while j >= 0 and number_to_order < my_list[j]:
            my_list[j+1] = my_list[j]
            j -= 1
        my_list[j+1] = number_to_order
    return my_list
# merge sort | O(n logn)

def merge_sort(my_list):
    if len(my_list) > 1:
        mid = len(my_list)//2
        left_half = my_list[:mid]
        right_half = my_list[mid:]
        merge_sort(left_half)
        merge_sort(right_half)

        i = j = k = 0
        while i < len(left_half) and j < len(right_half):
            if left_half[i] < right_half[j]:
                my_list[k] = left_half[i]
                i += 1
            else:
                my_list[k] = right_half[j]
                j += 1
            k += 1

        while i < len(left_half):
            my_list[k] = left_half[i]
            i += 1
            k += 1

        while j < len(right_half):
            my_list[k] = right_half[j]
            j += 1
            k += 1
# quicksort (Hoare's partition) | O(n²)

def partition(my_list, first_index, last_index):
  pivot = my_list[first_index]
  left_pointer = first_index + 1
  right_pointer = last_index
 
  while True:
    while my_list[left_pointer] < pivot and left_pointer < last_index:
      left_pointer += 1
    while my_list[right_pointer] > pivot and right_pointer >= first_index:
      right_pointer -= 1 
    if left_pointer >= right_pointer:
        break
    my_list[left_pointer], my_list[right_pointer] = my_list[right_pointer], my_list[left_pointer]
  
  my_list[first_index], my_list[right_pointer] = my_list[right_pointer], my_list[first_index]
  return right_pointer

def quicksort(my_list, first_index=0, last_index=None):
  if last_index == None:
    last_index = len(my_list) - 1
  if first_index < last_index:
    partition_index = partition(my_list, first_index, last_index)
    quicksort(my_list, first_index, partition_index - 1)
    quicksort(my_list, partition_index + 1, last_index)

Compare runtime

def evaluate_search_algorithm(algorithm):
    sizes = [100, 500, 1000, 2000, 5000, 10000]
    times = []
    for size in sizes:
        # generate data
        data = list(range(size))
        times.append(
            timeit.timeit(  # output in seconds
                stmt=lambda: algorithm(data, random.randrange(size)),  # always search for different integer
                number=1
            )
        )
    return times


def evaluate_sort_algorithms(algorithm):
    sizes = [5, 10, 20, 50, 75, 100]
    times = []
    for size in sizes:
        # generate data
        data = [random.randint(1,1000) for _ in range(size)]
        times.append(
            timeit.timeit(
                stmt=lambda: algorithm(data),
                number=1
            )
        )
    return times
runtime_search, runtime_sort = {}, {}

for algorithm in [linear_search, binary_search_recursive]:
    runtime_search[algorithm.__name__] = evaluate_search_algorithm(algorithm)
for algorithm in [bubble_sort, selection_sort, insertion_sort, merge_sort, quicksort]:
    runtime_sort[algorithm.__name__] = evaluate_sort_algorithms(algorithm)
fig, axs = plt.subplots(1, 2, figsize=(7,4), layout="constrained")
for alg, time in runtime_search.items():
    axs[0].plot([100, 500, 1000, 2000, 5000, 10000], [t*1e3 for t in time], marker='.', label=alg)
for alg, time in runtime_sort.items():
    axs[1].plot([5, 10, 20, 50, 75, 100], [t*1e3 for t in time], marker='.', label=alg)

titles = ["search", "sort"]
for ax, title in zip(axs, titles):
    ax.legend()
    ax.set_xlabel("list length")
    ax.set_ylabel("runtime [ms]")
    ax.set_title(title)
    ax.set_ylim(bottom=0)
    ax.set_xlim(left=0)

png

To display this jupyter notebook here, I used nbconvert to export it directly as markdown.