Skip to content

Commit 93f5ea2

Browse files
authored
refactor: have a single function for normalized PyPI package names (bazel-contrib#1329)
Before this PR there were at least 2 places where such a helper function existed and it made it very easy to make another copy. This PR introduces a hardened version, that follows conventions from upstream PyPI and tests have been added. Split from bazel-contrib#1294, work towards bazel-contrib#1262.
1 parent 5c5ab5b commit 93f5ea2

File tree

6 files changed

+124
-26
lines changed

6 files changed

+124
-26
lines changed

python/extensions/pip.bzl

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ load(
2626
"whl_library",
2727
)
2828
load("@rules_python//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
29+
load("//python/private:normalize_name.bzl", "normalize_name")
2930

3031
def _whl_mods_impl(mctx):
3132
"""Implementation of the pip.whl_mods tag class.
@@ -130,7 +131,7 @@ def _create_versioned_pip_and_whl_repos(module_ctx, pip_attr, whl_map):
130131
# would need to guess what name we modified the whl name
131132
# to.
132133
annotation = whl_modifications.get(whl_name)
133-
whl_name = _sanitize_name(whl_name)
134+
whl_name = normalize_name(whl_name)
134135
whl_library(
135136
name = "%s_%s" % (pip_name, whl_name),
136137
requirement = requirement_line,
@@ -318,10 +319,6 @@ def _pip_impl(module_ctx):
318319
whl_library_alias_names = whl_map.keys(),
319320
)
320321

321-
# Keep in sync with python/pip_install/tools/bazel.py
322-
def _sanitize_name(name):
323-
return name.replace("-", "_").replace(".", "_").lower()
324-
325322
def _pip_parse_ext_attrs():
326323
attrs = dict({
327324
"hub_name": attr.string(

python/pip_install/pip_repository.bzl

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ load("//python/pip_install:repositories.bzl", "all_requirements")
2020
load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
2121
load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
2222
load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
23+
load("//python/private:normalize_name.bzl", "normalize_name")
2324
load("//python/private:toolchains_repo.bzl", "get_host_os_arch")
2425

2526
CPPFLAGS = "CPPFLAGS"
@@ -267,10 +268,6 @@ A requirements_lock attribute must be specified, or a platform-specific lockfile
267268
""")
268269
return requirements_txt
269270

270-
# Keep in sync with `_clean_pkg_name` in generated bzlmod requirements.bzl
271-
def _clean_pkg_name(name):
272-
return name.replace("-", "_").replace(".", "_").lower()
273-
274271
def _pkg_aliases(rctx, repo_name, bzl_packages):
275272
"""Create alias declarations for each python dependency.
276273
@@ -376,7 +373,7 @@ def _pip_repository_bzlmod_impl(rctx):
376373
content = rctx.read(requirements_txt)
377374
parsed_requirements_txt = parse_requirements(content)
378375

379-
packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
376+
packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
380377

381378
bzl_packages = sorted([name for name, _ in packages])
382379
_create_pip_repository_bzlmod(rctx, bzl_packages, str(requirements_txt))
@@ -422,7 +419,7 @@ def _pip_repository_impl(rctx):
422419
content = rctx.read(requirements_txt)
423420
parsed_requirements_txt = parse_requirements(content)
424421

425-
packages = [(_clean_pkg_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
422+
packages = [(normalize_name(name), requirement) for name, requirement in parsed_requirements_txt.requirements]
426423

427424
bzl_packages = sorted([name for name, _ in packages])
428425

@@ -432,7 +429,7 @@ def _pip_repository_impl(rctx):
432429

433430
annotations = {}
434431
for pkg, annotation in rctx.attr.annotations.items():
435-
filename = "{}.annotation.json".format(_clean_pkg_name(pkg))
432+
filename = "{}.annotation.json".format(normalize_name(pkg))
436433
rctx.file(filename, json.encode_indent(json.decode(annotation)))
437434
annotations[pkg] = "@{name}//:{filename}".format(name = rctx.attr.name, filename = filename)
438435

python/pip_install/tools/lib/bazel.py

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import re
16+
1517
WHEEL_FILE_LABEL = "whl"
1618
PY_LIBRARY_LABEL = "pkg"
1719
DATA_LABEL = "data"
@@ -22,21 +24,9 @@
2224
def sanitise_name(name: str, prefix: str) -> str:
2325
"""Sanitises the name to be compatible with Bazel labels.
2426
25-
There are certain requirements around Bazel labels that we need to consider. From the Bazel docs:
26-
27-
Package names must be composed entirely of characters drawn from the set A-Z, a–z, 0–9, '/', '-', '.', and '_',
28-
and cannot start with a slash.
29-
30-
Due to restrictions on Bazel labels we also cannot allow hyphens. See
31-
https://github.com/bazelbuild/bazel/issues/6841
32-
33-
Further, rules-python automatically adds the repository root to the PYTHONPATH, meaning a package that has the same
34-
name as a module is picked up. We workaround this by prefixing with `pypi__`. Alternatively we could require
35-
`--noexperimental_python_import_all_repositories` be set, however this breaks rules_docker.
36-
See: https://github.com/bazelbuild/bazel/issues/2636
27+
See the doc in ../../../private/normalize_name.bzl.
3728
"""
38-
39-
return prefix + name.replace("-", "_").replace(".", "_").lower()
29+
return prefix + re.sub(r"[-_.]+", "_", name).lower()
4030

4131

4232
def _whl_name_to_repo_root(whl_name: str, repo_prefix: str) -> str:

python/private/normalize_name.bzl

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2023 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Normalize a PyPI package name to allow consistent label names
17+
18+
Note we chose `_` instead of `-` as a separator as there are certain
19+
requirements around Bazel labels that we need to consider.
20+
21+
From the Bazel docs:
22+
> Package names must be composed entirely of characters drawn from the set
23+
> A-Z, a–z, 0–9, '/', '-', '.', and '_', and cannot start with a slash.
24+
25+
However, due to restrictions on Bazel labels we also cannot allow hyphens.
26+
See https://github.com/bazelbuild/bazel/issues/6841
27+
28+
Further, rules_python automatically adds the repository root to the
29+
PYTHONPATH, meaning a package that has the same name as a module is picked
30+
up. We workaround this by prefixing with `<hub_name>_`.
31+
32+
Alternatively we could require
33+
`--noexperimental_python_import_all_repositories` be set, however this
34+
breaks rules_docker.
35+
See: https://github.com/bazelbuild/bazel/issues/2636
36+
37+
Also see Python spec on normalizing package names:
38+
https://packaging.python.org/en/latest/specifications/name-normalization/
39+
"""
40+
41+
# Keep in sync with ../pip_install/tools/lib/bazel.py
42+
def normalize_name(name):
43+
"""normalize a PyPI package name and return a valid bazel label.
44+
45+
Args:
46+
name: str, the PyPI package name.
47+
48+
Returns:
49+
a normalized name as a string.
50+
"""
51+
name = name.replace("-", "_").replace(".", "_").lower()
52+
if "__" not in name:
53+
return name
54+
55+
# Handle the edge-case where there are consecutive `-`, `_` or `.` characters,
56+
# which is a valid Python package name.
57+
return "_".join([
58+
part
59+
for part in name.split("_")
60+
if part
61+
])
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
load(":normalize_name_tests.bzl", "normalize_name_test_suite")
2+
3+
normalize_name_test_suite(name = "normalize_name_tests")
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2023 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
""
16+
17+
load("@rules_testing//lib:test_suite.bzl", "test_suite")
18+
load("//python/private:normalize_name.bzl", "normalize_name") # buildifier: disable=bzl-visibility
19+
20+
_tests = []
21+
22+
def _test_name_normalization(env):
23+
want = {
24+
input: "friendly_bard"
25+
for input in [
26+
"friendly-bard",
27+
"Friendly-Bard",
28+
"FRIENDLY-BARD",
29+
"friendly.bard",
30+
"friendly_bard",
31+
"friendly--bard",
32+
"FrIeNdLy-._.-bArD",
33+
]
34+
}
35+
36+
actual = {
37+
input: normalize_name(input)
38+
for input in want.keys()
39+
}
40+
env.expect.that_dict(actual).contains_exactly(want)
41+
42+
_tests.append(_test_name_normalization)
43+
44+
def normalize_name_test_suite(name):
45+
"""Create the test suite.
46+
47+
Args:
48+
name: the name of the test suite
49+
"""
50+
test_suite(name = name, basic_tests = _tests)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy