aboutsummaryrefslogtreecommitdiffstats
path: root/library/python/reservoir_sampling/__init__.py
blob: 4ee46ee5e1177f758e32884968696cda314cc5e2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import random


def reservoir_sampling(data, nsamples, prng=None):
    if prng is None:
        prng = random

    result = []
    for i, entry in enumerate(data):
        if i < nsamples:
            result.append(entry)
        else:
            j = prng.randint(0, i)
            if j < nsamples:
                result[j] = entry
    return result