Skip to content

Commit 9753f32

Browse files
authored
Delay import of sklearn ijson (#40)
* delay import of sklearn ijson * ruff * remove rstcheck * remove circecli * complex * fix documentation
1 parent 0fda7bf commit 9753f32

File tree

11 files changed

+16
-134
lines changed

11 files changed

+16
-134
lines changed

.circleci/config.yml

Lines changed: 0 additions & 70 deletions
This file was deleted.

.github/workflows/documentation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ jobs:
7777
grep ERROR doc.txt
7878
exit 1
7979
fi
80-
if [[ $(grep WARNING doc.txt) ]]; then
80+
if [[ $(grep WARNING doc.txt | grep -v 'std:term:y') ]]; then
8181
echo "Documentation produces warnings."
8282
grep WARNING doc.txt
8383
exit 1

.github/workflows/rstcheck.yml

Lines changed: 0 additions & 27 deletions
This file was deleted.

azure-pipelines.yml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
- script: pip install -r requirements-dev.txt
2525
displayName: 'Install Requirements dev'
2626
- script: |
27-
ruff .
27+
ruff check .
2828
displayName: 'Ruff'
2929
- script: |
3030
black --diff .
@@ -76,11 +76,8 @@ jobs:
7676
- script: pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
7777
displayName: 'Install scikit-learn nightly'
7878
- script: |
79-
ruff .
79+
ruff check .
8080
displayName: 'Ruff'
81-
- script: |
82-
rstcheck -r ./_doc ./pandas_streaming
83-
displayName: 'rstcheck'
8481
- script: |
8582
black --diff .
8683
displayName: 'Black'
@@ -117,11 +114,8 @@ jobs:
117114
- script: pip install -r requirements-dev.txt
118115
displayName: 'Install Requirements dev'
119116
- script: |
120-
ruff .
117+
ruff check .
121118
displayName: 'Ruff'
122-
- script: |
123-
rstcheck -r ./_doc ./pandas_streaming
124-
displayName: 'rstcheck'
125119
- script: |
126120
black --diff .
127121
displayName: 'Black'

pandas_streaming/df/connex_split.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from logging import getLogger
33
import pandas
44
import numpy
5-
from sklearn.model_selection import train_test_split
65
from .dataframe_helpers import dataframe_shuffle
76

87
logger = getLogger("pandas-streaming")
@@ -61,6 +60,8 @@ def train_test_split_weights(
6160
raise ValueError(
6261
f"test_size={test_size} or train_size={train_size} cannot be null (1)."
6362
)
63+
from sklearn.model_selection import train_test_split
64+
6465
return train_test_split(
6566
df, test_size=test_size, train_size=train_size, random_state=random_state
6667
)

pandas_streaming/df/dataframe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -640,10 +640,10 @@ def _reservoir_sampling(
640640
if len(indices) < n:
641641
indices.append((i, ir))
642642
else:
643-
x = nrandom.random() # pylint: disable=E1101
643+
x = nrandom.random()
644644
if x * n < (seen - n):
645645
k = nrandom.randint(0, len(indices) - 1)
646-
indices[k] = (i, ir) # pylint: disable=E1126
646+
indices[k] = (i, ir)
647647
indices = set(indices)
648648

649649
def reservoir_iterate(sdf, indices, chunksize):

pandas_streaming/df/dataframe_helpers.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,9 @@ def numpy_types():
2525
numpy.uint16,
2626
numpy.uint32,
2727
numpy.uint64,
28-
numpy.float_,
2928
numpy.float16,
3029
numpy.float32,
3130
numpy.float64,
32-
numpy.complex_,
3331
numpy.complex64,
3432
numpy.complex128,
3533
]
@@ -155,13 +153,13 @@ def hash_floatl(c):
155153
} # pylint: disable=R1721
156154
for c in cols:
157155
t = coltype[c]
158-
if t == int:
156+
if t == int: # noqa: E721
159157
df[c] = df[c].apply(hash_intl)
160158
elif t == numpy.int64:
161159
df[c] = df[c].apply(lambda x: numpy.int64(hash_intl(x)))
162-
elif t == float:
160+
elif t == float: # noqa: E721
163161
df[c] = df[c].apply(hash_floatl)
164-
elif t == object:
162+
elif t == object: # noqa: E721
165163
df[c] = df[c].apply(hash_strl)
166164
else:
167165
raise NotImplementedError( # pragma: no cover

pandas_streaming/df/dataframe_io_helpers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from ujson import dumps
66
except ImportError: # pragma: no cover
77
from json import dumps
8-
import ijson
98

109

1110
class JsonPerRowsStream:
@@ -257,6 +256,8 @@ def enumerate_json_items(
257256
else:
258257
if hasattr(filename, "seek"):
259258
filename.seek(0)
259+
import ijson
260+
260261
parser = ijson.parse(filename)
261262
current = None
262263
curkey = None

pandas_streaming/df/dataframe_split.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def sklearn_train_test_split(
4545
)
4646
with warnings.catch_warnings():
4747
warnings.filterwarnings("ignore", category=ImportWarning)
48-
from sklearn.model_selection import train_test_split # pylint: disable=C0415
48+
from sklearn.model_selection import train_test_split
4949

5050
opts = ["test_size", "train_size", "random_state", "shuffle", "stratify"]
5151
split_ops = {}

pyproject.toml

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,3 @@
1-
[tool.rstcheck]
2-
report_level = "INFO"
3-
ignore_directives = [
4-
"autoclass",
5-
"autofunction",
6-
"automodule",
7-
"exreflist",
8-
"gdot",
9-
"image-sg",
10-
"pr",
11-
"runpython",
12-
]
13-
ignore_roles = ["epkg"]
14-
151
[tool.ruff]
162

173
# Exclude a variety of commonly ignored directories.
@@ -25,11 +11,11 @@ exclude = [
2511
# Same as Black.
2612
line-length = 88
2713

28-
[tool.ruff.mccabe]
14+
[tool.ruff.lint.mccabe]
2915
# Unlike Flake8, default to a complexity level of 10.
3016
max-complexity = 10
3117

32-
[tool.ruff.per-file-ignores]
18+
[tool.ruff.lint.per-file-ignores]
3319
"_doc/examples/plot_first_example.py" = ["E402", "F811"]
3420
"_unittests/ut_df/test_dataframe_io_helpers.py" = ["E501"]
3521
"pandas_streaming/data/__init__.py" = ["F401"]

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy