python · rhettinger · May 8, 2020 · May 6, 2020 · May 6, 2020 · May 6, 2020
diff --git a/Doc/library/random.rst b/Doc/library/random.rst
@@ -217,7 +217,7 @@ Functions for sequences
       The optional parameter *random*.
 
 
-.. function:: sample(population, k)
+.. function:: sample(population, k, *, weights=None)
 
    Return a *k* length list of unique elements chosen from the population sequence
    or set. Used for random sampling without replacement.
@@ -231,13 +231,20 @@ Functions for sequences
    Members of the population need not be :term:`hashable` or unique.  If the population
    contains repeats, then each occurrence is a possible selection in the sample.
 
+   If *weights* are given, they must be non-negative integer counts.
+   Each selection effectively reduces the count by one, lowering
+   the probablity for the next selection.
+
    To choose a sample from a range of integers, use a :func:`range` object as an
    argument.  This is especially fast and space efficient for sampling from a large
    population:  ``sample(range(10000000), k=60)``.
 
    If the sample size is larger than the population size, a :exc:`ValueError`
    is raised.
 
+   .. versionchanged:: 3.9
+      Added the *weights* parameter.
+
    .. deprecated:: 3.9
       In the future, the *population* must be a sequence.  Instances of
       :class:`set` are no longer supported.  The set must first be converted
@@ -420,12 +427,11 @@ Simulations::
    >>> choices(['red', 'black', 'green'], [18, 18, 2], k=6)
    ['red', 'green', 'black', 'black', 'red', 'black']
 
-   >>> # Deal 20 cards without replacement from a deck of 52 playing cards
-   >>> # and determine the proportion of cards with a ten-value
-   >>> # (a ten, jack, queen, or king).
-   >>> deck = collections.Counter(tens=16, low_cards=36)
-   >>> seen = sample(list(deck.elements()), k=20)
-   >>> seen.count('tens') / 20
+   >>> # Deal 20 cards without replacement from a deck
+   >>> # of 52 playing cards, and determine the proportion of cards
+   >>> # with a ten-value:  ten, jack, queen, or king.
+   >>> dealt = sample(['tens', 'low cards'], weights=[16, 36], k=20)
+   >>> dealt.count('tens') / 20
    0.15
 
    >>> # Estimate the probability of getting 5 or more heads from 7 spins

diff --git a/Lib/random.py b/Lib/random.py
@@ -331,7 +331,7 @@ def shuffle(self, x, random=None):
                 j = _int(random() * (i+1))
                 x[i], x[j] = x[j], x[i]
 
-    def sample(self, population, k):
+    def sample(self, population, k, *, weights=None):
         """Chooses k unique random elements from a population sequence or set.
 
         Returns a new list containing elements from the population while
@@ -340,6 +340,10 @@ def sample(self, population, k):
         samples.  This allows raffle winners (the sample) to be partitioned
         into grand prize and second place winners (the subslices).
 
+        If weights are given, they must be non-negative integer counts.
+        Each selection effectively reduces the count by one, lowering
+        the probablity for the next selection.
+
         Members of the population need not be hashable or unique.  If the
         population contains repeats, then each occurrence is a possible
         selection in the sample.
@@ -379,8 +383,20 @@ def sample(self, population, k):
             population = tuple(population)
         if not isinstance(population, _Sequence):
             raise TypeError("Population must be a sequence.  For dicts or sets, use sorted(d).")
-        randbelow = self._randbelow
         n = len(population)
+        if weights is not None:
+            cum_weights = list(_accumulate(weights))
+            if len(cum_weights) != n:
+                raise ValueError('The number of weights does not match the population')
+            total = cum_weights.pop()
+            if not isinstance(total, int):
+                raise TypeError('Weights must be integers')
+            if total < 0:
+                raise ValueError('Total of weights must be greater than zero')
+            selections = sample(range(total), k=k)
+            bisect = _bisect
+            return [population[bisect(cum_weights, s)] for s in selections]
+        randbelow = self._randbelow
         if not 0 <= k <= n:
             raise ValueError("Sample larger than population or is negative")
         result = [None] * k

diff --git a/Lib/test/test_random.py b/Lib/test/test_random.py
@@ -9,7 +9,7 @@
 from math import log, exp, pi, fsum, sin, factorial
 from test import support
 from fractions import Fraction
-
+from collections import Counter
 
 class TestBasicOps:
     # Superclass with tests common to all generators.
@@ -161,6 +161,43 @@ def test_sample_on_sets(self):
             population = {10, 20, 30, 40, 50, 60, 70}
             self.gen.sample(population, k=5)
 
+    def test_sample_with_weights(self):
+        sample = self.gen.sample
+
+        # General case
+        colors =  ['red', 'green', 'blue', 'orange', 'black', 'brown', 'amber']
+        weights = [500,      200,     20,       10,       5,       0,       1 ]
+        k = 700
+        summary = Counter(sample(colors, weights=weights, k=k))
+        self.assertEqual(sum(summary.values()), k)
+        for color, weight in zip(colors, weights):
+            self.assertLessEqual(summary[color], weight)
+        self.assertNotIn('brown', summary)
+
+        # Case that exhausts the population
+        k = sum(weights)
+        summary = Counter(sample(colors, weights=weights, k=k))
+        self.assertEqual(sum(summary.values()), k)
+        for color, weight in zip(colors, weights):
+            self.assertLessEqual(summary[color], weight)
+        self.assertNotIn('brown', summary)
+
+        # Case with population size of 1
+        summary = Counter(sample(['x'], weights=[10], k=8))
+        self.assertEqual(summary, Counter(x=8))
+
+        # Test error handling
+        with self.assertRaises(TypeError):
+            sample(['red', 'green', 'blue'], weights=10, k=10)               # weights not iterable
+        with self.assertRaises(ValueError):
+            sample(['red', 'green', 'blue'], weights=[-3, -7, -8], k=2)      # weights are negative
+        with self.assertRaises(ValueError):
+            sample(['red', 'green'], weights=[10, 10], k=21)                 # population too small
+        with self.assertRaises(ValueError):
+            sample(['red', 'green', 'blue'], weights=[1, 2], k=2)            # too few weights
+        with self.assertRaises(ValueError):
+            sample(['red', 'green', 'blue'], weights=[1, 2, 3, 4], k=2)      # too many weights
+
     def test_choices(self):
         choices = self.gen.choices
         data = ['red', 'green', 'blue', 'yellow']

diff --git a/Misc/NEWS.d/next/Library/2020-05-06-15-36-47.bpo-40541.LlYghL.rst b/Misc/NEWS.d/next/Library/2020-05-06-15-36-47.bpo-40541.LlYghL.rst
@@ -0,0 +1 @@
+Added an optional *weights* parameter to random.sample().
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added an optional weights parameter to random.sample().