diff --git a/README.md b/README.md index 6278c37..6244340 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,7 @@ By using this template, your data science project is auto-generated as follows: ``` . |-- notebooks # A directory to place all notebooks files. -| |-- *.ipynb -| `-- ipython_config.py # IPython magic to let *.ipynb treat src/ as PYTHONPATH +| `-- *.ipynb |-- setup.py # To pip install your Python module (if module name specified to cookiecutter) |-- src | |-- my_custom_module # Your custom module @@ -52,7 +51,6 @@ By using this template, your data science project is auto-generated as follows: |-- .vscenv # Sample dot env with PYTHONPATH config (for IDE /editor that support this) |-- LICENSE # Boilperplate (auto-generated content based on what specified to cookiecutter) |-- README.md # Template for you to customize -|-- ipython_config.py # Sample copy of ipython_config.py (same as notebook/ipython_config.py) |-- pyproject.toml # Sample setting for Python code formatter `-- tox.ini # Sample configurations for Python toolchains ``` diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index 69b478f..4916e98 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -41,7 +41,7 @@ def rm(s: Path) -> None: "#", "# Recommended next steps:", f"# - cd {cwd}", - "# - git init (this is needed for the ipython_config.py magic to work)", + "# - git init", "# - pre-commit autoupdate", "# - pre-commit install", "# - review README.md", diff --git a/{{cookiecutter.repo_name}}/.pre-commit-config.yaml b/{{cookiecutter.repo_name}}/.pre-commit-config.yaml index 2d65eec..162ddd1 100644 --- a/{{cookiecutter.repo_name}}/.pre-commit-config.yaml +++ b/{{cookiecutter.repo_name}}/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.0.1 hooks: - id: check-json - id: check-merge-conflict @@ -13,16 +13,27 @@ repos: - id: detect-private-key - id: end-of-file-fixer - id: trailing-whitespace +- repo: https://github.com/asottile/pyupgrade + rev: v2.29.1 + hooks: + - id: pyupgrade - repo: https://github.com/myint/autoflake rev: v1.4 hooks: - id: autoflake args: [--ignore-init-module-imports, --in-place] - repo: https://github.com/timothycrosley/isort - rev: 5.8.0 + rev: 5.10.1 hooks: - id: isort - repo: https://github.com/psf/black - rev: 20.8b1 + rev: 21.12b0 hooks: - id: black +- repo: https://github.com/nbQA-dev/nbQA + rev: 1.2.2 + hooks: + - id: nbqa-pyupgrade + args: [--py36-plus] + - id: nbqa-isort + - id: nbqa-black diff --git a/{{cookiecutter.repo_name}}/README.md b/{{cookiecutter.repo_name}}/README.md index cdc5fd9..92708f6 100644 --- a/{{cookiecutter.repo_name}}/README.md +++ b/{{cookiecutter.repo_name}}/README.md @@ -8,8 +8,7 @@ {{cookiecutter.repo_name}} |-- bin # CLI scripts |-- notebooks -| |-- *.ipynb # Jupyter notebooks -| `-- ipython_config.py # IPython magic to let *.ipynb treat src/ as PYTHONPATH +| `-- *.ipynb # Jupyter notebooks {% if cookiecutter.package_name != "" -%} |-- setup.py # To install {{cookiecutter.repo_name}} as a Python module {% endif -%} @@ -27,7 +26,6 @@ |-- .vscenv # Dot env with PYTHONPATH config (for IDE /editor that support this) |-- LICENSE # License |-- README.md # Template document -|-- ipython_config.py # A copy of ipython_config.py (same as notebook/ipython_config.py) |-- pyproject.toml # Setting for Python code formatter `-- tox.ini # Settings for select Python toolchains ``` diff --git a/{{cookiecutter.repo_name}}/bin/README.md b/{{cookiecutter.repo_name}}/bin/README.md new file mode 100644 index 0000000..8169049 --- /dev/null +++ b/{{cookiecutter.repo_name}}/bin/README.md @@ -0,0 +1,3 @@ +# CLI scripts + +This folder holds command line interface (CLI) scripts. This scripts typically provide entry points to kick off common tasks in your data science project, such as model training or inference. diff --git a/{{cookiecutter.repo_name}}/notebooks/ipython_config.py b/{{cookiecutter.repo_name}}/notebooks/ipython_config.py deleted file mode 100644 index d97c322..0000000 --- a/{{cookiecutter.repo_name}}/notebooks/ipython_config.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import subprocess -from pathlib import Path -from typing import Union - -#################################################################################################### -# Additional PYTHONPATH to allow notebooks to import custom modules at a few pre-defined places. - - -def sys_path_append(o: Union[str, os.PathLike]) -> str: - posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix() - return 'sys.path.insert(0, "{}")'.format(posix_path) - - -_pythonpath = [ - "import sys, os", - sys_path_append(os.getcwd()), -] - -# Add GIT_ROOT/ and a few other subdirs -try: - _p = subprocess.run( - ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - - if _p.returncode == 0: - _git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n' - _git_root_p: Path = Path(_git_root) - _pythonpath += [ - sys_path_append(_git_root_p), # GIT_ROOT - sys_path_append(_git_root_p / "src"), # GIT_ROOT/src - sys_path_append(_git_root_p / "notebooks"), # GIT_ROOT/notebooks - ] -except: # noqa: E722 - pass - -c.InteractiveShellApp.exec_lines = _pythonpath # type: ignore # noqa: F821 -#################################################################################################### diff --git a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb index 8cc5151..11d7c3c 100644 --- a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb +++ b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb @@ -32,11 +32,11 @@ "import ndpretty\n", "import numpy as np\n", "import pandas as pd\n", + "import sagemaker as sm\n", "from IPython.display import Markdown\n", "from smallmatter.ds import mask_df # See: https://github.com/aws-samples/smallmatter-package/\n", "\n", "# A few standard SageMaker's stanzas. Use type annotation to be verbose.\n", - "import sagemaker as sm\n", "role: str = sm.get_execution_role()\n", "sess = sm.Session()\n", "region: str = sess.boto_session.region_name" @@ -71,15 +71,15 @@ "####################################################################################################\n", "# Change me\n", "####################################################################################################\n", - "bucket_name = 'my-bucket-name'\n", - "prefix_name = 'some/prefix'\n", + "bucket_name = \"my-bucket-name\"\n", + "prefix_name = \"some/prefix\"\n", "####################################################################################################\n", "\n", "\n", "####################################################################################################\n", "# Do not change the next lines, as they're derived and will be recomputed automatically.\n", "####################################################################################################\n", - "s3_prefix = f's3://{bucket_name}/{prefix_name}'.rstrip('/')\n", + "s3_prefix = f\"s3://{bucket_name}/{prefix_name}\".rstrip(\"/\")\n", "\n", "# Synchronize Python variable and environment variable.\n", "%set_env S3_PREFIX=$s3_prefix\n", @@ -299,20 +299,28 @@ ], "source": [ "def mask_userid(df: pd.DataFrame) -> pd.DataFrame:\n", - " return mask_df(df, cols=['userid'])\n", + " return mask_df(df, cols=[\"userid\"])\n", "\n", - "df_a = pd.DataFrame({'a': [1,2,3], 'b': [4,5,6]})\n", - "df_b = pd.DataFrame({\n", - " 'userid': [1000,2000,3000],\n", - " 'pca_a': [0.1, 0.2, 0.3],\n", - " 'pca_b': [-0.3, 0.01, 0.7]\n", - " })\n", + "\n", + "df_a = pd.DataFrame(\n", + " {\n", + " \"a\": [1, 2, 3],\n", + " \"b\": [4, 5, 6],\n", + " }\n", + ")\n", + "df_b = pd.DataFrame(\n", + " {\n", + " \"userid\": [1000, 2000, 3000],\n", + " \"pca_a\": [0.1, 0.2, 0.3],\n", + " \"pca_b\": [-0.3, 0.01, 0.7],\n", + " }\n", + ")\n", "\n", "display(\n", " Markdown('## Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\"'),\n", " df_a,\n", - "\n", - " Markdown('''## Masked dataframe\n", + " Markdown(\n", + " \"\"\"## Masked dataframe\n", "Sometime, we would like to version the output of this cell into the git repo, to help readers to\n", "quickly see the shape of a dataframe.\n", "\n", @@ -320,7 +328,7 @@ "**NEVER** version these values to git.\n", "Otherwise, as you all know, once checked into the git history, it can be tedious and challenging to\n", "undo the versioning.\n", - "'''\n", + "\"\"\"\n", " ),\n", " mask_userid(df_b),\n", ")" @@ -343,7 +351,7 @@ "source": [ "# Affect globally\n", "ndpretty.default()\n", - "np.random.rand(9,9)\n", + "np.random.rand(9, 9)\n", "\n", "# NOTE: without ndpretty.default(), use this form:\n", "# ndpretty.ndarray_html(np.random.rand(3, 4))\n",
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: