diff --git a/README.md b/README.md
index 6244340..789c0b4 100644
--- a/README.md
+++ b/README.md
@@ -36,10 +36,12 @@ By using this template, your data science project is auto-generated as follows:
```
.
|-- notebooks # A directory to place all notebooks files.
-| `-- *.ipynb
+| |-- *.ipynb
+| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH
|-- setup.py # To pip install your Python module (if module name specified to cookiecutter)
|-- src
| |-- my_custom_module # Your custom module
+| |-- my_nb_color.py # Imported by *.ipynb to colorize their outputs
| `-- source_dir # You can further create this subdir for SageMaker entrypoint scripts
|-- tests # Unit tests
diff --git a/cookiecutter.json b/cookiecutter.json
index 2014bb2..893409e 100644
--- a/cookiecutter.json
+++ b/cookiecutter.json
@@ -8,9 +8,5 @@
"MIT License",
"Apache-2.0 License"
],
- "python_interpreter": [
- "python3",
- "python"
- ],
"package_name": ""
}
diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py
index 4916e98..38c047e 100644
--- a/hooks/post_gen_project.py
+++ b/hooks/post_gen_project.py
@@ -50,7 +50,7 @@ def rm(s: Path) -> None:
message.append("# - review LICENSE")
if package_name != "":
message += [
- "# - review and update setup.py, then remove the exception at the end.",
+ "# - review and update setup.py, then remove the exception at the start.",
"# - consider to adopt versioneer to version your package.",
]
message += [
diff --git a/{{cookiecutter.repo_name}}/README.md b/{{cookiecutter.repo_name}}/README.md
index 92708f6..209d8f9 100644
--- a/{{cookiecutter.repo_name}}/README.md
+++ b/{{cookiecutter.repo_name}}/README.md
@@ -8,14 +8,16 @@
{{cookiecutter.repo_name}}
|-- bin # CLI scripts
|-- notebooks
-| `-- *.ipynb # Jupyter notebooks
+| |-- *.ipynb # Jupyter notebooks
+| `-- my_nb_path.py # Imported by *.ipynb to treat src/ as PYTHONPATH
{% if cookiecutter.package_name != "" -%}
|-- setup.py # To install {{cookiecutter.repo_name}} as a Python module
{% endif -%}
|-- src # Python modules developed in this project
{% if cookiecutter.package_name != "" -%}
-| `-- {{cookiecutter.repo_name}}
+| |-- {{cookiecutter.repo_name}}
{% endif -%}
+| `-- my_nb_color.py # Imported by *.ipynb to colorize their outputs
`-- tests # Unit tests
# Miscellaneous files
diff --git a/{{cookiecutter.repo_name}}/ipython_config.py b/{{cookiecutter.repo_name}}/ipython_config.py
deleted file mode 100644
index d97c322..0000000
--- a/{{cookiecutter.repo_name}}/ipython_config.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import os
-import subprocess
-from pathlib import Path
-from typing import Union
-
-####################################################################################################
-# Additional PYTHONPATH to allow notebooks to import custom modules at a few pre-defined places.
-
-
-def sys_path_append(o: Union[str, os.PathLike]) -> str:
- posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix()
- return 'sys.path.insert(0, "{}")'.format(posix_path)
-
-
-_pythonpath = [
- "import sys, os",
- sys_path_append(os.getcwd()),
-]
-
-# Add GIT_ROOT/ and a few other subdirs
-try:
- _p = subprocess.run(
- ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
- )
-
- if _p.returncode == 0:
- _git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n'
- _git_root_p: Path = Path(_git_root)
- _pythonpath += [
- sys_path_append(_git_root_p), # GIT_ROOT
- sys_path_append(_git_root_p / "src"), # GIT_ROOT/src
- sys_path_append(_git_root_p / "notebooks"), # GIT_ROOT/notebooks
- ]
-except: # noqa: E722
- pass
-
-c.InteractiveShellApp.exec_lines = _pythonpath # type: ignore # noqa: F821
-####################################################################################################
diff --git a/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py
new file mode 100644
index 0000000..0571245
--- /dev/null
+++ b/{{cookiecutter.repo_name}}/notebooks/my_nb_path.py
@@ -0,0 +1,69 @@
+"""Allow notebooks to import custom modules at a few pre-defined places within this project's
+git repository.
+
+When imported, adds ``GITROOT``, ``GITROOT/src``, and ``GITROOT/notebooks`` to `sys.path`.
+
+Place this file in the same directory as your ``.ipynb`` files. If ``.ipynb`` files are organized
+into subfolders, please ensure this file is presented in each subfolder. Example:
+
+.. code-block:: bash
+
+ GITROOT
+ |-- .git # Signify this is a git repository
+ |-- notebooks # Parent folder of Jupyter notebooks
+ | |-- folder-a
+ | | |-- my_nb_path.py # Importable by nb-abc.ipynb and nb-xyz.ipynb
+ | | |-- nb-abc.ipynb
+ | | `-- nb-xyz.ipynb
+ | |-- my_nb_path.py # Importable by nb-01.ipynb and nb-02.ipynb
+ | |-- nb-01.ipynb
+ | `-- nb-02.ipynb
+ `-- src
+ `-- my_custom_module
+ |-- __init__.py
+ `-- ...
+
+Usage by ``.ipynb``:
+
+ >>> # Allow this notebook to import from GITROOT, GITROOT/src, and GITROOT/notebooks.
+ >>> # This module must be imported before importing any other custom modules under GITROOT.
+ >>> # The isort directive prevents the statement to be moved around when isort is used.
+ >>> import my_nb_path # isort: skip
+ >>>
+ >>> # Test-drive importing a custom module under GITROOT/src.
+ >>> import my_custom_module
+
+Background: we used to rely on ``ipython_config.py`` in the current working directory. However,
+IPython 8.0.1+, 7.31.1+ and 5.11+ disable this behavior for security reason as described
+[here](https://ipython.readthedocs.io/en/stable/whatsnew/version8.html#ipython-8-0-1-cve-2022-21699).
+
+So now, each ``.ipynb`` must explicitly modify its own `sys.path` which is what this module offers
+as convenience.
+"""
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Union
+
+def sys_path_append(o: Union[str, os.PathLike]) -> None:
+ posix_path: str = o.as_posix() if isinstance(o, Path) else Path(o).as_posix()
+ if posix_path not in sys.path:
+ sys.path.insert(0, posix_path)
+
+# Add GIT_ROOT/ and a few other subdirs
+_p = subprocess.run(
+ ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+)
+
+if _p.returncode == 0:
+ _git_root: str = _p.stdout[:-1].decode("utf-8") # Remove trailing '\n'
+ _git_root_p = Path(_git_root)
+
+ my_sys_paths = [
+ _git_root_p,
+ _git_root_p / "src",
+ _git_root_p / "notebooks",
+ ]
+ for sp in my_sys_paths:
+ sys_path_append(sp)
diff --git a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
index 11d7c3c..4ec4031 100644
--- a/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
+++ b/{{cookiecutter.repo_name}}/notebooks/skeleton.ipynb
@@ -13,8 +13,9 @@
"- Best viewed using Jupyter Lab.\n",
"- The title is a styled sentence rather than `h1`, to prevent it being showed and numbered in TOC.\n",
"\n",
- "
NOTE: this skeleton notebook is primarily for reading. To run it\n",
- "completely, you need to install additional dependencies imported in the cell below.
"
+ "**NOTE:** this skeleton notebook is meant for reading. To run it,\n",
+ "please install additional dependencies imported in the second next cell which starts with line\n",
+ "`# Dependencies required`."
]
},
{
@@ -28,12 +29,24 @@
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
- "# Follow isort>=5 style: 'import ...' statements before 'from ... import ...'.\n",
+ "# Make sure my_nb_path is imported first (and when isort is used, it needs to be told).\n",
+ "import my_nb_path # isort: skip\n",
+ "from my_nb_color import print, rprint"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Dependencies required\n",
"import ndpretty\n",
"import numpy as np\n",
"import pandas as pd\n",
"import sagemaker as sm\n",
"from IPython.display import Markdown\n",
+ "from loguru import logger\n",
"from smallmatter.ds import mask_df # See: https://github.com/aws-samples/smallmatter-package/\n",
"\n",
"# A few standard SageMaker's stanzas. Use type annotation to be verbose.\n",
@@ -145,6 +158,67 @@
"# Improved output"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Colored outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Colored: \u001b[1m{\u001b[0m\u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m, \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\u001b[1m}\u001b[0m\n",
+ "Colored and wrapped:\n",
+ "\u001b[1m{\u001b[0m\n",
+ " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n",
+ "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\u001b[0m\n",
+ "\u001b[32mAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n",
+ " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n",
+ "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\u001b[0m\n",
+ "\u001b[32mBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n",
+ "\u001b[1m}\u001b[0m\n",
+ "\n",
+ "\u001b[1m{\u001b[0m\n",
+ " \u001b[32m'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'\u001b[0m,\n",
+ " \u001b[32m'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB'\u001b[0m\n",
+ "\u001b[1m}\u001b[0m\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m2022-01-22 17:23:03.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[34m\u001b[1mHello World!\u001b[0m\n",
+ "\u001b[32m2022-01-22 17:23:03.530\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[1mHello World!\u001b[0m\n",
+ "\u001b[32m2022-01-22 17:23:03.531\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[32m\u001b[1mHello World!\u001b[0m\n",
+ "\u001b[32m2022-01-22 17:23:03.532\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m7\u001b[0m - \u001b[31m\u001b[1mHello World!\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "d = {\"A\" * 200, \"B\" * 200}\n",
+ "print(\"Colored:\", d)\n",
+ "rprint(\"Colored and wrapped:\", d)\n",
+ "display(d)\n",
+ "\n",
+ "for f in (logger.debug, logger.info, logger.success, logger.error):\n",
+ " f(\"Hello World!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Dataframes"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -153,8 +227,8 @@
{
"data": {
"text/markdown": [
- "## Plain dataframe\n",
- "**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\""
+ "### Plain dataframe\n",
+ "**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\""
],
"text/plain": [
""
@@ -221,7 +295,7 @@
{
"data": {
"text/markdown": [
- "## Masked dataframe\n",
+ "### Masked dataframe\n",
"Sometime, we would like to version the output of this cell into the git repo, to help readers to\n",
"quickly see the shape of a dataframe.\n",
"\n",
@@ -310,17 +384,19 @@
")\n",
"df_b = pd.DataFrame(\n",
" {\n",
- " \"userid\": [1000, 2000, 3000],\n",
+ " \"userid\": [1000, 2000, 3000], # Illustration only. Usually read from somewhere.\n",
" \"pca_a\": [0.1, 0.2, 0.3],\n",
" \"pca_b\": [-0.3, 0.01, 0.7],\n",
" }\n",
")\n",
"\n",
"display(\n",
- " Markdown('## Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.1. Plain dataframe*\"'),\n",
+ " Markdown(\n",
+ " '### Plain dataframe\\n**NOTE:** this also appears in TOC as \"*2.2.1. Plain dataframe*\"'\n",
+ " ),\n",
" df_a,\n",
" Markdown(\n",
- " \"\"\"## Masked dataframe\n",
+ " \"\"\"### Masked dataframe\n",
"Sometime, we would like to version the output of this cell into the git repo, to help readers to\n",
"quickly see the shape of a dataframe.\n",
"\n",
@@ -400,9 +476,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Environment (virtualenv_p39x)",
+ "display_name": "Environment (virtualenv_ds-p310)",
"language": "python",
- "name": "virtualenv_p39x"
+ "name": "virtualenv_ds-p310"
},
"language_info": {
"codemirror_mode": {
@@ -414,7 +490,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.2"
+ "version": "3.10.2"
},
"toc-autonumbering": true,
"toc-showcode": false,
diff --git a/{{cookiecutter.repo_name}}/setup.py b/{{cookiecutter.repo_name}}/setup.py
index 85a7ad6..46f5f88 100644
--- a/{{cookiecutter.repo_name}}/setup.py
+++ b/{{cookiecutter.repo_name}}/setup.py
@@ -1,3 +1,8 @@
+raise ValueError(
+ "Baseline setup.py from cookiecutter aws-samples/python-data-science-template. "
+ "Please review and modify accordingly, then remove this exception"
+)
+
import os
from typing import List
@@ -55,8 +60,3 @@ def read(fname) -> str:
python_requires=">=3.6.0",
install_requires=required_packages,
)
-
-raise ValueError(
- "Baseline setup.py from cookiecutter verdimrc/py-ds-template. "
- "Please review and modify accordingly, then remove this exception"
-)
diff --git a/{{cookiecutter.repo_name}}/src/my_nb_color.py b/{{cookiecutter.repo_name}}/src/my_nb_color.py
new file mode 100644
index 0000000..f13a467
--- /dev/null
+++ b/{{cookiecutter.repo_name}}/src/my_nb_color.py
@@ -0,0 +1,44 @@
+"""Convenience module to setup color prints and logs in a Jupyter notebook.
+
+Dependencies: `loguru`, `rich`.
+
+Basic usage by an ``.ipynb``:
+
+ >>> # Colorize notebook outputs
+ >>> from my_nb_color import print, rprint, oprint
+ >>>
+ >>> # Test-drive different behavior of print functionalities
+ >>> d = {"A" * 200, "B" * 200}
+ >>> print("Colored:", d)
+ >>> rprint("Colored and wrapped:", d)
+ >>> oprint("Plain (i.e., Python's original):", d)
+ >>> display(d)
+ >>>
+ >>> # Test-drive loguru
+ >>> from loguru import logger
+ >>> for f in (logger.debug, logger.info, logger.success, logger.error):
+ >>> f("Hello World!")
+"""
+import sys
+
+
+# Try to setup rich.
+try:
+ import rich
+except ModuleNotFoundError:
+ print = rprint = oprint = print
+else:
+ oprint = print # In-case plain old behavior is needed
+ rich.reconfigure(force_terminal=True, force_jupyter=False)
+ rich.pretty.install()
+ print = rich.get_console().out
+ rprint = rich.get_console().print
+
+
+# Try to setup loguru.
+try:
+ from loguru import logger
+except ModuleNotFoundError:
+ pass
+else:
+ logger.configure(handlers=[dict(sink=sys.stderr, colorize=True)])
pFad - Phonifier reborn
Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy