diff --git a/Lib/statistics.py b/Lib/statistics.py index 7d5d750193a5ab..6bd214bbfe2ff5 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -1036,7 +1036,7 @@ def covariance(x, y, /): raise StatisticsError('covariance requires at least two data points') xbar = fsum(x) / n ybar = fsum(y) / n - sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) + sxy = sumprod((xi - xbar for xi in x), (yi - ybar for yi in y)) return sxy / (n - 1) @@ -1074,11 +1074,14 @@ def correlation(x, y, /, *, method='linear'): start = (n - 1) / -2 # Center rankings around zero x = _rank(x, start=start) y = _rank(y, start=start) - xbar = fsum(x) / n - ybar = fsum(y) / n - sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) - sxx = fsum((d := xi - xbar) * d for xi in x) - syy = fsum((d := yi - ybar) * d for yi in y) + else: + xbar = fsum(x) / n + ybar = fsum(y) / n + x = [xi - xbar for xi in x] + y = [yi - ybar for yi in y] + sxy = sumprod(x, y) + sxx = sumprod(x, x) + syy = sumprod(y, y) try: return sxy / sqrt(sxx * syy) except ZeroDivisionError: @@ -1131,14 +1134,13 @@ def linear_regression(x, y, /, *, proportional=False): raise StatisticsError('linear regression requires that both inputs have same number of data points') if n < 2: raise StatisticsError('linear regression requires at least two data points') - if proportional: - sxy = fsum(xi * yi for xi, yi in zip(x, y)) - sxx = fsum(xi * xi for xi in x) - else: + if not proportional: xbar = fsum(x) / n ybar = fsum(y) / n - sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y)) - sxx = fsum((d := xi - xbar) * d for xi in x) + x = [xi - xbar for xi in x] # List because used three times below + y = (yi - ybar for yi in y) # Generator because only used once below + sxy = sumprod(x, y) + 0.0 # Add zero to coerce result to a float + sxx = sumprod(x, x) try: slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x) except ZeroDivisionError: diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 31a3cb6b53a6f2..f0fa6454b1f91a 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -1,4 +1,4 @@ -"""Test suite for statistics module, including helper NumericTestCase and +x = """Test suite for statistics module, including helper NumericTestCase and approx_equal function. """ @@ -2610,6 +2610,16 @@ def test_proportional(self): self.assertAlmostEqual(slope, 20 + 1/150) self.assertEqual(intercept, 0.0) + def test_float_output(self): + x = [Fraction(2, 3), Fraction(3, 4)] + y = [Fraction(4, 5), Fraction(5, 6)] + slope, intercept = statistics.linear_regression(x, y) + self.assertTrue(isinstance(slope, float)) + self.assertTrue(isinstance(intercept, float)) + slope, intercept = statistics.linear_regression(x, y, proportional=True) + self.assertTrue(isinstance(slope, float)) + self.assertTrue(isinstance(intercept, float)) + class TestNormalDist: # General note on precision: The pdf(), cdf(), and overlap() methods diff --git a/Misc/NEWS.d/next/Library/2023-03-13-18-27-00.gh-issue-102670.GyoThv.rst b/Misc/NEWS.d/next/Library/2023-03-13-18-27-00.gh-issue-102670.GyoThv.rst new file mode 100644 index 00000000000000..3de09f86754f3e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-13-18-27-00.gh-issue-102670.GyoThv.rst @@ -0,0 +1,2 @@ +Optimized fmean(), correlation(), covariance(), and linear_regression() +using the new math.sumprod() function.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: