aboutsummaryrefslogtreecommitdiffstats
path: root/library/python/reservoir_sampling/__init__.py
blob: 7fc579feb43ec3b897867842ca6fafd372031aa2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import random 
 
 
def reservoir_sampling(data, nsamples, prng=None): 
    if prng is None: 
        prng = random 
 
    result = [] 
    for i, entry in enumerate(data): 
        if i < nsamples: 
            result.append(entry) 
        else: 
            j = prng.randint(0, i) 
            if j < nsamples: 
                result[j] = entry 
    return result