diff --git a/docs/api/python/store.rst b/docs/api/python/store.rst index bd2c2eb4892..305657c65f1 100644 --- a/docs/api/python/store.rst +++ b/docs/api/python/store.rst @@ -1,17 +1,18 @@ +==================== Object Store support ==================== -Vortex arrays support reading and writing to object storage systems such as, S3, Google Cloud Storage, and -Azure Blob Storage. - -.. autosummary:: - :nosignatures: - -.. raw:: html +Vortex arrays support reading and writing to many object storage systems: -
+.. toctree:: + :maxdepth: 1 -.. automodule:: vortex.store - :members: - :imported-members: + store/aws + store/gcs + store/azure + store/http + store/local + store/memory + store/config +.. autofunction:: vortex.store.from_url diff --git a/docs/api/python/store/aws.rst b/docs/api/python/store/aws.rst new file mode 100644 index 00000000000..f29fc1d72cb --- /dev/null +++ b/docs/api/python/store/aws.rst @@ -0,0 +1,20 @@ +==================== +S3 and S3-compatible +==================== + + +.. autosummary:: + vortex.store.S3Store + vortex.store.S3Config + vortex.store.S3Credential + vortex.store.S3CredentialProvider + +.. autoclass:: vortex.store.S3Store + :members: +.. autoclass:: vortex.store.S3Config + :members: +.. autoclass:: vortex.store.S3Credential + :members: +.. autoclass:: vortex.store.S3CredentialProvider + :members: + diff --git a/docs/api/python/store/azure.rst b/docs/api/python/store/azure.rst new file mode 100644 index 00000000000..d06a42dcaf7 --- /dev/null +++ b/docs/api/python/store/azure.rst @@ -0,0 +1,29 @@ +================== +Azure Blob Storage +================== + +.. autosummary:: + vortex.store.AzureStore + vortex.store.AzureConfig + vortex.store.AzureAccessKey + vortex.store.AzureSASToken + vortex.store.AzureBearerToken + vortex.store.AzureCredential + vortex.store.AzureCredentialProvider + + +.. autoclass:: vortex.store.AzureStore + :members: +.. autoclass:: vortex.store.AzureConfig + :members: +.. autoclass:: vortex.store.AzureAccessKey + :members: +.. autoclass:: vortex.store.AzureSASToken + :members: +.. autoclass:: vortex.store.AzureBearerToken + :members: +.. autoclass:: vortex.store.AzureCredential + :members: +.. autoclass:: vortex.store.AzureCredentialProvider + :members: + diff --git a/docs/api/python/store/config.rst b/docs/api/python/store/config.rst new file mode 100644 index 00000000000..90a3bd48c1a --- /dev/null +++ b/docs/api/python/store/config.rst @@ -0,0 +1,16 @@ +==================== +Common Configuration +==================== + +.. autosummary:: + vortex.store.ClientConfig + vortex.store.RetryConfig + vortex.store.BackoffConfig + + +.. autoclass:: vortex.store.ClientConfig + :members: +.. autoclass:: vortex.store.RetryConfig + :members: +.. autoclass:: vortex.store.BackoffConfig + :members: diff --git a/docs/api/python/store/gcs.rst b/docs/api/python/store/gcs.rst new file mode 100644 index 00000000000..a4a3300b4b5 --- /dev/null +++ b/docs/api/python/store/gcs.rst @@ -0,0 +1,18 @@ +==================== +Google Cloud Storage +==================== + +.. autosummary:: + vortex.store.GCSStore + vortex.store.GCSConfig + vortex.store.GCSCredential + vortex.store.GCSCredentialProvider + +.. autoclass:: vortex.store.GCSStore + :members: +.. autoclass:: vortex.store.GCSConfig + :members: +.. autoclass:: vortex.store.GCSCredential + :members: +.. autoclass:: vortex.store.GCSCredentialProvider + :members: diff --git a/docs/api/python/store/http.rst b/docs/api/python/store/http.rst new file mode 100644 index 00000000000..49b6f71de3a --- /dev/null +++ b/docs/api/python/store/http.rst @@ -0,0 +1,6 @@ +==== +HTTP +==== + +.. autoclass:: vortex.store.HTTPStore + :members: diff --git a/docs/api/python/store/local.rst b/docs/api/python/store/local.rst new file mode 100644 index 00000000000..a92b5211945 --- /dev/null +++ b/docs/api/python/store/local.rst @@ -0,0 +1,6 @@ +===== +Local +===== + +.. autoclass:: vortex.store.LocalStore + :members: diff --git a/docs/api/python/store/memory.rst b/docs/api/python/store/memory.rst new file mode 100644 index 00000000000..beab652f770 --- /dev/null +++ b/docs/api/python/store/memory.rst @@ -0,0 +1,6 @@ +====== +Memory +====== + +.. autoclass:: vortex.store.MemoryStore + :members: diff --git a/docs/pyproject.toml b/docs/pyproject.toml index 63378cbf06f..809bd55d67f 100644 --- a/docs/pyproject.toml +++ b/docs/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ "sphinx-copybutton>=0.5.2", "sphinx-design>=0.6.0", "sphinx-inline-tabs>=2023.4.21", - "sphinx>=8.0.2", + "sphinx>=9.0.0", "sphinxcontrib-bibtex>=2.6.3", "sphinxcontrib-mermaid>=1.0.0", "sphinxext-opengraph>=0.9.1", diff --git a/uv.lock b/uv.lock index eae89dc20d9..f4bc78120f4 100644 --- a/uv.lock +++ b/uv.lock @@ -131,7 +131,8 @@ name = "breathe" version = "4.36.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/01/56/99bf7d0799d95ad485d95596dc01c2a5b3dda58ebf50a94f6f73b33bacdf/breathe-4.36.0.tar.gz", hash = "sha256:14860b73118ac140b7a3f55446890c777d1b67149cb024279fe3710dad7f535c", size = 154842, upload-time = "2025-02-22T18:36:03.36Z" } wheels = [ @@ -241,7 +242,8 @@ dependencies = [ { name = "myst-parser" }, { name = "pydata-sphinx-theme" }, { name = "setuptools" }, - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, { name = "sphinx-design" }, @@ -261,7 +263,7 @@ requires-dist = [ { name = "myst-parser", specifier = ">=4.0.0" }, { name = "pydata-sphinx-theme", specifier = ">=0.16.0" }, { name = "setuptools", specifier = ">=75.8.0" }, - { name = "sphinx", specifier = ">=8.0.2" }, + { name = "sphinx", specifier = ">=9.0.0" }, { name = "sphinx-autobuild", specifier = ">=2024.10.3" }, { name = "sphinx-copybutton", specifier = ">=0.5.2" }, { name = "sphinx-design", specifier = ">=0.6.0" }, @@ -349,7 +351,8 @@ name = "hawkmoth" version = "0.21.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/11/19/bc186706b15923d02c75d5113443190ffc2be44d60903b8d5ff2813a7958/hawkmoth-0.21.0.tar.gz", hash = "sha256:da338218411fdc67b52254f02dbdec162ed4a7db588de7d3a1e5314864fce5cd", size = 25958, upload-time = "2025-05-10T12:35:22.053Z" } wheels = [ @@ -496,14 +499,14 @@ wheels = [ [[package]] name = "markdown-it-py" -version = "3.0.0" +version = "4.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mdurl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" }, + { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, ] [[package]] @@ -689,7 +692,7 @@ wheels = [ [[package]] name = "myst-parser" -version = "4.0.1" +version = "5.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "docutils" }, @@ -697,11 +700,12 @@ dependencies = [ { name = "markdown-it-py" }, { name = "mdit-py-plugins" }, { name = "pyyaml" }, - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/66/a5/9626ba4f73555b3735ad86247a8077d4603aa8628537687c839ab08bfe44/myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4", size = 93985, upload-time = "2025-02-12T10:53:03.833Z" } +sdist = { url = "https://files.pythonhosted.org/packages/33/fa/7b45eef11b7971f0beb29d27b7bfe0d747d063aa29e170d9edd004733c8a/myst_parser-5.0.0.tar.gz", hash = "sha256:f6f231452c56e8baa662cc352c548158f6a16fcbd6e3800fc594978002b94f3a", size = 98535, upload-time = "2026-01-15T09:08:18.036Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579, upload-time = "2025-02-12T10:53:02.078Z" }, + { url = "https://files.pythonhosted.org/packages/d3/ac/686789b9145413f1a61878c407210e41bfdb097976864e0913078b24098c/myst_parser-5.0.0-py3-none-any.whl", hash = "sha256:ab31e516024918296e169139072b81592336f2fef55b8986aa31c9f04b5f7211", size = 84533, upload-time = "2026-01-15T09:08:16.788Z" }, ] [[package]] @@ -1118,7 +1122,8 @@ dependencies = [ { name = "beautifulsoup4" }, { name = "docutils" }, { name = "pygments" }, - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/00/20/bb50f9de3a6de69e6abd6b087b52fa2418a0418b19597601605f855ad044/pydata_sphinx_theme-0.16.1.tar.gz", hash = "sha256:a08b7f0b7f70387219dc659bff0893a7554d5eb39b59d3b8ef37b8401b7642d7", size = 2412693, upload-time = "2024-12-17T10:53:39.537Z" } @@ -1324,12 +1329,12 @@ wheels = [ ] [[package]] -name = "roman-numerals-py" -version = "3.1.0" +name = "roman-numerals" +version = "4.1.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/30/76/48fd56d17c5bdbdf65609abbc67288728a98ed4c02919428d4f52d23b24b/roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d", size = 9017, upload-time = "2025-02-22T07:34:54.333Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/41dc953bbeb056c17d5f7a519f50fdf010bd0553be2d630bc69d1e022703/roman_numerals-4.1.0.tar.gz", hash = "sha256:1af8b147eb1405d5839e78aeb93131690495fe9da5c91856cb33ad55a7f1e5b2", size = 9077, upload-time = "2025-12-17T18:25:34.381Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/53/97/d2cbbaa10c9b826af0e10fdf836e1bf344d9f0abb873ebc34d1f49642d3f/roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c", size = 7742, upload-time = "2025-02-22T07:34:52.422Z" }, + { url = "https://files.pythonhosted.org/packages/04/54/6f679c435d28e0a568d8e8a7c0a93a09010818634c3c3907fc98d8983770/roman_numerals-4.1.0-py3-none-any.whl", hash = "sha256:647ba99caddc2cc1e55a51e4360689115551bf4476d90e8162cf8c345fe233c7", size = 7676, upload-time = "2025-12-17T18:25:33.098Z" }, ] [[package]] @@ -1522,30 +1527,64 @@ wheels = [ [[package]] name = "sphinx" -version = "8.2.3" +version = "9.0.4" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.12'", +] dependencies = [ - { name = "alabaster" }, - { name = "babel" }, - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "docutils" }, - { name = "imagesize" }, - { name = "jinja2" }, - { name = "packaging" }, - { name = "pygments" }, - { name = "requests" }, - { name = "roman-numerals-py" }, - { name = "snowballstemmer" }, - { name = "sphinxcontrib-applehelp" }, - { name = "sphinxcontrib-devhelp" }, - { name = "sphinxcontrib-htmlhelp" }, - { name = "sphinxcontrib-jsmath" }, - { name = "sphinxcontrib-qthelp" }, - { name = "sphinxcontrib-serializinghtml" }, + { name = "alabaster", marker = "python_full_version < '3.12'" }, + { name = "babel", marker = "python_full_version < '3.12'" }, + { name = "colorama", marker = "python_full_version < '3.12' and sys_platform == 'win32'" }, + { name = "docutils", marker = "python_full_version < '3.12'" }, + { name = "imagesize", marker = "python_full_version < '3.12'" }, + { name = "jinja2", marker = "python_full_version < '3.12'" }, + { name = "packaging", marker = "python_full_version < '3.12'" }, + { name = "pygments", marker = "python_full_version < '3.12'" }, + { name = "requests", marker = "python_full_version < '3.12'" }, + { name = "roman-numerals", marker = "python_full_version < '3.12'" }, + { name = "snowballstemmer", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version < '3.12'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/50/a8c6ccc36d5eacdfd7913ddccd15a9cee03ecafc5ee2bc40e1f168d85022/sphinx-9.0.4.tar.gz", hash = "sha256:594ef59d042972abbc581d8baa577404abe4e6c3b04ef61bd7fc2acbd51f3fa3", size = 8710502, upload-time = "2025-12-04T07:45:27.343Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/3f/4bbd76424c393caead2e1eb89777f575dee5c8653e2d4b6afd7a564f5974/sphinx-9.0.4-py3-none-any.whl", hash = "sha256:5bebc595a5e943ea248b99c13814c1c5e10b3ece718976824ffa7959ff95fffb", size = 3917713, upload-time = "2025-12-04T07:45:24.944Z" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/ad/4360e50ed56cb483667b8e6dadf2d3fda62359593faabbe749a27c4eaca6/sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348", size = 8321876, upload-time = "2025-03-02T22:31:59.658Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/31/53/136e9eca6e0b9dc0e1962e2c908fbea2e5ac000c2a2fbd9a35797958c48b/sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3", size = 3589741, upload-time = "2025-03-02T22:31:56.836Z" }, + +[[package]] +name = "sphinx" +version = "9.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.12'", +] +dependencies = [ + { name = "alabaster", marker = "python_full_version >= '3.12'" }, + { name = "babel", marker = "python_full_version >= '3.12'" }, + { name = "colorama", marker = "python_full_version >= '3.12' and sys_platform == 'win32'" }, + { name = "docutils", marker = "python_full_version >= '3.12'" }, + { name = "imagesize", marker = "python_full_version >= '3.12'" }, + { name = "jinja2", marker = "python_full_version >= '3.12'" }, + { name = "packaging", marker = "python_full_version >= '3.12'" }, + { name = "pygments", marker = "python_full_version >= '3.12'" }, + { name = "requests", marker = "python_full_version >= '3.12'" }, + { name = "roman-numerals", marker = "python_full_version >= '3.12'" }, + { name = "snowballstemmer", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-applehelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-devhelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-htmlhelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-jsmath", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-qthelp", marker = "python_full_version >= '3.12'" }, + { name = "sphinxcontrib-serializinghtml", marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/bd/f08eb0f4eed5c83f1ba2a3bd18f7745a2b1525fad70660a1c00224ec468a/sphinx-9.1.0.tar.gz", hash = "sha256:7741722357dd75f8190766926071fed3bdc211c74dd2d7d4df5404da95930ddb", size = 8718324, upload-time = "2025-12-31T15:09:27.646Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/f7/b1884cb3188ab181fc81fa00c266699dab600f927a964df02ec3d5d1916a/sphinx-9.1.0-py3-none-any.whl", hash = "sha256:c84fdd4e782504495fe4f2c0b3413d6c2bf388589bb352d439b2a3bb99991978", size = 3921742, upload-time = "2025-12-31T15:09:25.561Z" }, ] [[package]] @@ -1554,7 +1593,8 @@ version = "2025.8.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama" }, - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "starlette" }, { name = "uvicorn" }, { name = "watchfiles" }, @@ -1570,7 +1610,8 @@ name = "sphinx-copybutton" version = "0.5.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/fc/2b/a964715e7f5295f77509e59309959f4125122d648f86b4fe7d70ca1d882c/sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd", size = 23039, upload-time = "2023-04-14T08:10:22.998Z" } wheels = [ @@ -1582,7 +1623,8 @@ name = "sphinx-design" version = "0.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/13/7b/804f311da4663a4aecc6cf7abd83443f3d4ded970826d0c958edc77d4527/sphinx_design-0.7.0.tar.gz", hash = "sha256:d2a3f5b19c24b916adb52f97c5f00efab4009ca337812001109084a740ec9b7a", size = 2203582, upload-time = "2026-01-19T13:12:53.297Z" } wheels = [ @@ -1594,7 +1636,8 @@ name = "sphinx-inline-tabs" version = "2023.4.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/48/f5/f8a2be63ed7be9f91a4c2bea0e25bcb56aa4c5cc37ec4d8ead8065f926b1/sphinx_inline_tabs-2023.4.21.tar.gz", hash = "sha256:5df2f13f602c158f3f5f6c509e008aeada199a8c76d97ba3aa2822206683bebc", size = 42664, upload-time = "2023-04-21T20:25:30.578Z" } wheels = [ @@ -1618,7 +1661,8 @@ dependencies = [ { name = "docutils" }, { name = "pybtex" }, { name = "pybtex-docutils" }, - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/de/83/1488c9879f2fa3c2cbd6f666c7a3a42a1fa9e08462bec73281fa6c092cba/sphinxcontrib_bibtex-2.6.5.tar.gz", hash = "sha256:9b3224dd6fece9268ebd8c905dc0a83ff2f6c54148a9235fe70e9d1e9ff149c0", size = 118462, upload-time = "2025-06-27T10:40:14.061Z" } wheels = [ @@ -1659,7 +1703,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jinja2" }, { name = "pyyaml" }, - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/96/a5/65a5c439cc14ba80483b9891e9350f11efb80cd3bdccb222f0c738068c78/sphinxcontrib_mermaid-2.0.0.tar.gz", hash = "sha256:cf4f7d453d001132eaba5d1fdf53d42049f02e913213cf8337427483bfca26f4", size = 18194, upload-time = "2026-01-13T17:13:42.563Z" } wheels = [ @@ -1689,7 +1734,8 @@ name = "sphinxext-opengraph" version = "0.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "sphinx" }, + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/f6/c0/eb6838e3bae624ce6c8b90b245d17e84252863150e95efdb88f92c8aa3fb/sphinxext_opengraph-0.13.0.tar.gz", hash = "sha256:103335d08567ad8468faf1425f575e3b698e9621f9323949a6c8b96d9793e80b", size = 1026875, upload-time = "2025-08-29T12:20:31.066Z" } wheels = [ @@ -1875,7 +1921,7 @@ requires-dist = [ { name = "polars", marker = "extra == 'polars'", specifier = ">=1.31.0" }, { name = "pyarrow", specifier = ">=17.0.0" }, { name = "ray", marker = "extra == 'ray'", specifier = ">=2.48" }, - { name = "substrait", specifier = ">=0.23.0" }, + { name = "substrait", specifier = ">=0.23.0,<0.85.0" }, { name = "typing-extensions", specifier = ">=4.5.0" }, ] provides-extras = ["duckdb", "numpy", "pandas", "polars", "ray"] diff --git a/vortex-python/python/vortex/_lib/store/__init__.pyi b/vortex-python/python/vortex/_lib/store/__init__.pyi index 56b92353aeb..b394b118b80 100644 --- a/vortex-python/python/vortex/_lib/store/__init__.pyi +++ b/vortex-python/python/vortex/_lib/store/__init__.pyi @@ -130,13 +130,13 @@ class LocalStore: ``` """ - def __init__( + def __new__( self, prefix: str | Path | None = None, *, automatic_cleanup: bool = False, mkdir: bool = False, - ) -> None: + ) -> Self: """Create a new LocalStore. Args: diff --git a/vortex-python/python/vortex/_lib/store/_aws.pyi b/vortex-python/python/vortex/_lib/store/_aws.pyi index 8bed273bdca..91833151158 100644 --- a/vortex-python/python/vortex/_lib/store/_aws.pyi +++ b/vortex-python/python/vortex/_lib/store/_aws.pyi @@ -457,8 +457,8 @@ class S3Store: set in the environment. """ - def __init__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "bucket" - self, + def __new__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "bucket" + cls, bucket: str | None = None, *, prefix: str | None = None, @@ -467,7 +467,7 @@ class S3Store: retry_config: RetryConfig | None = None, credential_provider: S3CredentialProvider | None = None, **kwargs: Unpack[S3Config], # type: ignore # noqa: PGH003 (bucket key overlaps with positional arg) - ) -> None: + ) -> Self: """Create a new S3Store. Args: diff --git a/vortex-python/python/vortex/_lib/store/_azure.pyi b/vortex-python/python/vortex/_lib/store/_azure.pyi index 3c1d99af221..e59aa40c902 100644 --- a/vortex-python/python/vortex/_lib/store/_azure.pyi +++ b/vortex-python/python/vortex/_lib/store/_azure.pyi @@ -315,8 +315,8 @@ class AzureStore: [`AzureConfig`][vortex.store.AzureConfig] for valid environment variables. """ - def __init__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "container_name" - self, + def __new__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "container_name" + cls, container_name: str | None = None, *, prefix: str | None = None, @@ -325,7 +325,7 @@ class AzureStore: retry_config: RetryConfig | None = None, credential_provider: AzureCredentialProvider | None = None, **kwargs: Unpack[AzureConfig], # type: ignore # noqa: PGH003 (container_name key overlaps with positional arg) - ) -> None: + ) -> Self: """Construct a new AzureStore. Args: diff --git a/vortex-python/python/vortex/_lib/store/_gcs.pyi b/vortex-python/python/vortex/_lib/store/_gcs.pyi index f7f27008518..a988374593c 100644 --- a/vortex-python/python/vortex/_lib/store/_gcs.pyi +++ b/vortex-python/python/vortex/_lib/store/_gcs.pyi @@ -134,8 +134,8 @@ class GCSStore: [here](https://cloud.google.com/docs/authentication/application-default-credentials). """ - def __init__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "bucket" - self, + def __new__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "bucket" + cls, bucket: str | None = None, *, prefix: str | None = None, @@ -144,7 +144,7 @@ class GCSStore: retry_config: RetryConfig | None = None, credential_provider: GCSCredentialProvider | None = None, **kwargs: Unpack[GCSConfig], # type: ignore # noqa: PGH003 (bucket key overlaps with positional arg) - ) -> None: + ) -> Self: """Construct a new GCSStore. Args: diff --git a/vortex-python/python/vortex/_lib/store/_http.pyi b/vortex-python/python/vortex/_lib/store/_http.pyi index 549470fd4c5..56d9e464c29 100644 --- a/vortex-python/python/vortex/_lib/store/_http.pyi +++ b/vortex-python/python/vortex/_lib/store/_http.pyi @@ -9,13 +9,13 @@ from ._retry import RetryConfig class HTTPStore: """Configure a connection to a generic HTTP server.""" - def __init__( + def __new__( self, url: str, *, client_options: ClientConfig | None = None, retry_config: RetryConfig | None = None, - ) -> None: + ) -> Self: """Construct a new HTTPStore from a URL. Any path on the URL will be assigned as the `prefix` for the store. So if you diff --git a/vortex-python/python/vortex/store.py b/vortex-python/python/vortex/store.py deleted file mode 100644 index 3819ef86042..00000000000 --- a/vortex-python/python/vortex/store.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright the Vortex contributors - -from typing import TYPE_CHECKING - -from ._lib.store import ( # pyright: ignore[reportMissingModuleSource] - AzureStore, - GCSStore, - HTTPStore, - LocalStore, - MemoryStore, - S3Store, - from_url, -) - -if TYPE_CHECKING: - from ._lib.store import ( # pyright: ignore[reportMissingModuleSource] - AzureAccessKey, - AzureBearerToken, - AzureConfig, - AzureCredential, - AzureCredentialProvider, - AzureSASToken, - BackoffConfig, - ClientConfig, - GCSConfig, - GCSCredential, - GCSCredentialProvider, - ObjectStore, - RetryConfig, - S3Config, - S3Credential, - S3CredentialProvider, - ) - -__all__ = [ - # Azure - "AzureAccessKey", - "AzureBearerToken", - "AzureConfig", - "AzureCredential", - "AzureCredentialProvider", - "AzureSASToken", - "AzureStore", - # Client - "BackoffConfig", - "ClientConfig", - "RetryConfig", - # GCS - "GCSConfig", - "GCSCredential", - "GCSCredentialProvider", - "GCSStore", - # HTTP - "HTTPStore", - # Local - "LocalStore", - "MemoryStore", - # S3 - "S3Config", - "S3Credential", - "S3CredentialProvider", - "S3Store", - # Utility - "from_url", - "ObjectStore", -] diff --git a/vortex-python/python/vortex/store/LICENSE b/vortex-python/python/vortex/store/LICENSE new file mode 100644 index 00000000000..7bca320a280 --- /dev/null +++ b/vortex-python/python/vortex/store/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Development Seed + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vortex-python/python/vortex/store/__init__.py b/vortex-python/python/vortex/store/__init__.py new file mode 100644 index 00000000000..043f0bf9ab6 --- /dev/null +++ b/vortex-python/python/vortex/store/__init__.py @@ -0,0 +1,160 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +from collections.abc import Callable +from typing import TypeAlias, Unpack, overload + +from .._lib import store as _store # pyright: ignore[reportMissingModuleSource] +from ._aws import S3Config, S3Credential, S3CredentialProvider, S3Store +from ._azure import ( + AzureAccessKey, + AzureBearerToken, + AzureConfig, + AzureCredential, + AzureCredentialProvider, + AzureSASToken, + AzureStore, +) +from ._client import ClientConfig +from ._gcs import GCSConfig, GCSCredential, GCSCredentialProvider, GCSStore +from ._http import HTTPStore +from ._local import LocalStore +from ._memory import MemoryStore +from ._retry import BackoffConfig, RetryConfig + +ObjectStore: TypeAlias = AzureStore | GCSStore | HTTPStore | S3Store | LocalStore | MemoryStore +"""All supported ObjectStore implementations.""" + + +@overload +def from_url( + url: str, + *, + config: S3Config | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: S3CredentialProvider | None = None, + **kwargs: Unpack[S3Config], +) -> ObjectStore: ... +@overload +def from_url( + url: str, + *, + config: GCSConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: GCSCredentialProvider | None = None, + **kwargs: Unpack[GCSConfig], +) -> ObjectStore: ... +@overload +def from_url( + url: str, + *, + config: AzureConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: AzureCredentialProvider | None = None, + **kwargs: Unpack[AzureConfig], +) -> ObjectStore: ... +@overload +def from_url( + url: str, + *, + config: None = None, + client_options: None = None, + retry_config: None = None, + automatic_cleanup: bool = False, + mkdir: bool = False, +) -> ObjectStore: ... +def from_url( # type: ignore[misc] # docstring in pyi file + url: str, + *, + config: S3Config | GCSConfig | AzureConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: Callable[..., object] | None = None, + **kwargs: object, +) -> ObjectStore: + """Easy construction of store by URL, identifying the relevant store. + + This will defer to a store-specific ``from_url`` constructor based on the provided + ``url``. E.g. passing ``"s3://bucket/path"`` will defer to + :meth:`S3Store.from_url `. + + Supported formats: + + - ``file:///path/to/my/file`` -> :class:`~vortex.store.LocalStore` + - ``memory:///`` -> :class:`~vortex.store.MemoryStore` + - ``s3://bucket/path`` -> :class:`~vortex.store.S3Store` (also supports ``s3a``) + - ``gs://bucket/path`` -> :class:`~vortex.store.GCSStore` + - ``az://account/container/path`` -> :class:`~vortex.store.AzureStore` (also + supports ``adl``, ``azure``, ``abfs``, ``abfss``) + - ``http://mydomain/path`` -> :class:`~vortex.store.HTTPStore` + - ``https://mydomain/path`` -> :class:`~vortex.store.HTTPStore` + + There are also special cases for AWS and Azure for ``https://{host?}/path`` paths: + + - ``dfs.core.windows.net``, ``blob.core.windows.net``, ``dfs.fabric.microsoft.com``, + ``blob.fabric.microsoft.com`` -> :class:`~vortex.store.AzureStore` + - ``amazonaws.com`` -> :class:`~vortex.store.S3Store` + - ``r2.cloudflarestorage.com`` -> :class:`~vortex.store.S3Store` + + .. note:: + + For best static typing, use the constructors on individual store classes + directly. + + Args: + url: well-known storage URL. + + Keyword Args: + config: per-store Configuration. Values in this config will override values + inferred from the url. Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom credentials to the underlying store classes. + kwargs: per-store configuration passed down to store-specific builders. + + """ + return _store.from_url( # pyright: ignore[reportCallIssue, reportUnknownVariableType] + url, + config=config, # pyright: ignore[reportArgumentType] + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, # pyright: ignore[reportArgumentType] + **kwargs, # pyright: ignore[reportArgumentType] + ) + + +__all__ = [ + # Azure + "AzureAccessKey", + "AzureBearerToken", + "AzureConfig", + "AzureCredential", + "AzureCredentialProvider", + "AzureSASToken", + "AzureStore", + # Client + "BackoffConfig", + "ClientConfig", + "RetryConfig", + # GCS + "GCSConfig", + "GCSCredential", + "GCSCredentialProvider", + "GCSStore", + # HTTP + "HTTPStore", + # Local + "LocalStore", + "MemoryStore", + # S3 + "S3Config", + "S3Credential", + "S3CredentialProvider", + "S3Store", + # Utility + "from_url", + "ObjectStore", +] diff --git a/vortex-python/python/vortex/store/_aws.py b/vortex-python/python/vortex/store/_aws.py new file mode 100644 index 00000000000..3f3079e9902 --- /dev/null +++ b/vortex-python/python/vortex/store/_aws.py @@ -0,0 +1,562 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +from collections.abc import Coroutine +from datetime import datetime +from typing import Any, Literal, NotRequired, Protocol, Self, TypeAlias, TypedDict, Unpack + +from typing_extensions import override + +from .._lib import store as _store # pyright: ignore[reportMissingModuleSource] +from ._client import ClientConfig +from ._retry import RetryConfig + +S3Regions: TypeAlias = Literal[ + "af-south-1", + "ap-east-1", + "ap-northeast-1", + "ap-northeast-2", + "ap-northeast-3", + "ap-south-1", + "ap-south-2", + "ap-southeast-1", + "ap-southeast-2", + "ap-southeast-3", + "ap-southeast-4", + "ap-southeast-5", + "ap-southeast-7", + "ca-central-1", + "ca-west-1", + "eu-central-1", + "eu-central-2", + "eu-north-1", + "eu-south-1", + "eu-south-2", + "eu-west-1", + "eu-west-2", + "eu-west-3", + "il-central-1", + "me-central-1", + "me-south-1", + "mx-central-1", + "sa-east-1", + "us-east-1", + "us-east-2", + "us-gov-east-1", + "us-gov-west-1", + "us-west-1", + "us-west-2", +] +"""AWS regions.""" + +S3ChecksumAlgorithm: TypeAlias = Literal["SHA256"] +"""S3 Checksum algorithms + +From https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html#using-additional-checksums +""" + +S3EncryptionAlgorithm: TypeAlias = Literal[ + "AES256", + "aws:kms", + "aws:kms:dsse", + "sse-c", +] + + +class S3Config(TypedDict, total=False): + """Configuration parameters for S3Store.""" + + access_key_id: str + """AWS Access Key. + + **Environment variable**: ``AWS_ACCESS_KEY_ID``. + """ + bucket: str + """Bucket name (required). + + **Environment variables**: + + - ``AWS_BUCKET`` + - ``AWS_BUCKET_NAME`` + """ + checksum_algorithm: S3ChecksumAlgorithm | str + """ + Sets the `checksum algorithm `_ + which has to be used for object integrity check during upload. + + **Environment variable**: ``AWS_CHECKSUM_ALGORITHM``. + """ + conditional_put: str + """Configure how to provide conditional put support + + Supported values: + + - ``"etag"`` (default): Supported for S3-compatible stores that support conditional + put using the standard `HTTP precondition `_ + headers ``If-Match`` and ``If-None-Match``. + + - ``"dynamo:"`` or ``"dynamo::"``: The name of a DynamoDB table to + use for coordination. + + This will use the same region, credentials and endpoint as configured for S3. + + **Environment variable**: ``AWS_CONDITIONAL_PUT``. + """ + container_credentials_relative_uri: str + """Set the container credentials relative URI + + + + **Environment variable**: ``AWS_CONTAINER_CREDENTIALS_RELATIVE_URI``. + """ + copy_if_not_exists: Literal["multipart"] | str + """Configure how to provide "copy if not exists". + + Supported values: + + - ``"multipart"``: + + Native Amazon S3 supports copy if not exists through a multipart upload + where the upload copies an existing object and is completed only if the + new object does not already exist. + + .. warning:: + + When using this mode, ``copy_if_not_exists`` does not copy tags + or attributes from the source object. + + .. warning:: + + When using this mode, ``copy_if_not_exists`` makes only a best + effort attempt to clean up the multipart upload if the copy operation + fails. Consider using a lifecycle rule to automatically clean up + abandoned multipart uploads. + + - ``"header::"``: + + Some S3-compatible stores, such as Cloudflare R2, support copy if not exists + semantics through custom headers. + + If set, ``copy_if_not_exists`` will perform a normal copy operation with the + provided header pair, and expect the store to fail with `412 Precondition + Failed` if the destination file already exists. + + For example ``header: cf-copy-destination-if-none-match: *``, would set + the header ``cf-copy-destination-if-none-match`` to ``*``. + + - ``"header-with-status:::"``: + + The same as the header variant above but allows custom status code checking, for + object stores that return values other than 412. + + - ``"dynamo:"`` or ``"dynamo::"``: + + The name of a DynamoDB table to use for coordination. + + The default timeout is used if not specified. This will use the same region, + credentials and endpoint as configured for S3. + + **Environment variable**: ``AWS_COPY_IF_NOT_EXISTS``. + """ + default_region: S3Regions | str + """Default region. + + **Environment variable**: ``AWS_DEFAULT_REGION``. + """ + disable_tagging: bool + """Disable tagging objects. This can be desirable if not supported by the backing store. + + **Environment variable**: ``AWS_DISABLE_TAGGING``. + """ + endpoint: str + """The endpoint for communicating with AWS S3. + + Defaults to the `region endpoint `_. + + For example, this might be set to ``"http://localhost:4566"`` for testing against a + localstack instance. + + The ``endpoint`` field should be consistent with ``with_virtual_hosted_style_request``, + i.e. if ``virtual_hosted_style_request`` is set to ``True`` then ``endpoint`` should have + the bucket name included. + + By default, only HTTPS schemes are enabled. To connect to an HTTP endpoint, enable + ``allow_http`` in the client options. + + **Environment variables**: + + - ``AWS_ENDPOINT_URL`` + - ``AWS_ENDPOINT`` + """ + imdsv1_fallback: bool + """Fall back to ImdsV1. + + By default instance credentials will only be fetched over + `IMDSv2 `_, + as AWS recommends against having IMDSv1 enabled on EC2 instances as it is vulnerable to + `SSRF attack `_. + + However, certain deployment environments, such as those running old versions of + kube2iam, may not support IMDSv2. This option will enable automatic fallback to + using IMDSv1 if the token endpoint returns a 403 error indicating that IMDSv2 is not + supported. + + This option has no effect if not using instance credentials. + + **Environment variable**: ``AWS_IMDSV1_FALLBACK``. + """ + metadata_endpoint: str + """Set the instance metadata endpoint, used primarily within AWS EC2. + + This defaults to the IPv4 endpoint: ``http://169.254.169.254``. One can alternatively + use the IPv6 endpoint ``http://fd00:ec2::254``. + + **Environment variable**: ``AWS_METADATA_ENDPOINT``. + """ + region: S3Regions | str + """The region, defaults to ``us-east-1`` + + **Environment variable**: ``AWS_REGION``. + """ + request_payer: bool + """If ``True``, enable operations on requester-pays buckets. + + https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html + + **Environment variable**: ``AWS_REQUEST_PAYER``. + """ + s3_express: bool + """Enable Support for S3 Express One Zone. + + **Environment variable**: ``AWS_S3_EXPRESS``. + """ + secret_access_key: str + """Secret Access Key. + + **Environment variable**: ``AWS_SECRET_ACCESS_KEY``. + """ + server_side_encryption: S3EncryptionAlgorithm | str + """Type of encryption to use. + + If set, must be one of: + + - ``"AES256"`` (SSE-S3) + - ``"aws:kms"`` (SSE-KMS) + - ``"aws:kms:dsse"`` (DSSE-KMS) + - ``"sse-c"`` + + **Environment variable**: ``AWS_SERVER_SIDE_ENCRYPTION``. + """ + session_token: str + """Token to use for requests (passed to underlying provider). + + **Environment variables**: + + - ``AWS_SESSION_TOKEN`` + - ``AWS_TOKEN`` + """ + skip_signature: bool + """If ``True``, S3Store will not fetch credentials and will not sign requests. + + This can be useful when interacting with public S3 buckets that deny authorized requests. + + **Environment variable**: ``AWS_SKIP_SIGNATURE``. + """ + sse_bucket_key_enabled: bool + """Set whether to enable bucket key for server side encryption. + + This overrides the bucket default setting for bucket keys. + + - When ``False``, each object is encrypted with a unique data key. + - When ``True``, a single data key is used for the entire bucket, + reducing overhead of encryption. + + **Environment variable**: ``AWS_SSE_BUCKET_KEY_ENABLED``. + """ + sse_customer_key_base64: str + """ + The base64 encoded, 256-bit customer encryption key to use for server-side + encryption. If set, the server side encryption config value must be ``"sse-c"``. + + **Environment variable**: ``AWS_SSE_CUSTOMER_KEY_BASE64``. + """ + sse_kms_key_id: str + """ + The KMS key ID to use for server-side encryption. + + If set, the server side encryption config value must be ``"aws:kms"`` or ``"aws:kms:dsse"``. + + **Environment variable**: ``AWS_SSE_KMS_KEY_ID``. + """ + unsigned_payload: bool + """Avoid computing payload checksum when calculating signature. + + See `unsigned payload option `_. + + - ``False`` (default): Signed payload option is used, where the checksum for the request body is computed + and included when constructing a canonical request. + - ``True``: Unsigned payload option is used. ``UNSIGNED-PAYLOAD`` literal is included when constructing a + canonical request, + + **Environment variable**: ``AWS_UNSIGNED_PAYLOAD``. + """ + virtual_hosted_style_request: bool + """If virtual hosted style request has to be used. + + If ``virtual_hosted_style_request`` is: + + - ``False`` (default): Path style request is used + - ``True``: Virtual hosted style request is used + + If the ``endpoint`` is provided then it should be consistent with + ``virtual_hosted_style_request``. i.e. if ``virtual_hosted_style_request`` is set to + ``True`` then ``endpoint`` should have bucket name included. + + **Environment variable**: ``AWS_VIRTUAL_HOSTED_STYLE_REQUEST``. + """ + + +class S3Credential(TypedDict): + """An S3 credential.""" + + access_key_id: str + """AWS access key ID.""" + + secret_access_key: str + """AWS secret access key""" + + token: NotRequired[str | None] + """AWS token.""" + + expires_at: datetime | None + """Expiry datetime of credential. The datetime should have time zone set. + + If None, the credential will never expire. + """ + + +class S3CredentialProvider(Protocol): + """A type hint for a synchronous or asynchronous callback to provide custom S3 credentials. + + This should be passed into the ``credential_provider`` parameter of ``S3Store``. + + **Examples:** + + Return static credentials that don't expire: + + .. code-block:: python + + def get_credentials() -> S3Credential: + return { + "access_key_id": "...", + "secret_access_key": "...", + "token": None, + "expires_at": None, + } + + Return static credentials that are valid for 5 minutes: + + .. code-block:: python + + from datetime import datetime, timedelta, UTC + + async def get_credentials() -> S3Credential: + return { + "access_key_id": "...", + "secret_access_key": "...", + "token": None, + "expires_at": datetime.now(UTC) + timedelta(minutes=5), + } + + A class-based credential provider with state: + + .. code-block:: python + + from __future__ import annotations + + from typing import TYPE_CHECKING + + import boto3 + import botocore.credentials + + if TYPE_CHECKING: + from vortex.store import S3Credential + + + class Boto3CredentialProvider: + credentials: botocore.credentials.Credentials + + def __init__(self, session: boto3.session.Session) -> None: + credentials = session.get_credentials() + if credentials is None: + raise ValueError("Received None from session.get_credentials") + + self.credentials = credentials + + def __call__(self) -> S3Credential: + frozen_credentials = self.credentials.get_frozen_credentials() + return { + "access_key_id": frozen_credentials.access_key, + "secret_access_key": frozen_credentials.secret_key, + "token": frozen_credentials.token, + "expires_at": None, + } + + """ + + def __call__(self) -> S3Credential | Coroutine[Any, Any, S3Credential]: # pyright: ignore[reportExplicitAny] + """Return an ``S3Credential``.""" + ... + + +class S3Store(_store.S3Store): + """Interface to an Amazon S3 bucket. + + All constructors will check for environment variables. Refer to + :class:`~vortex.store.S3Config` for valid environment variables. + + **Examples**: + + **Using requester-pays buckets**: + + Pass ``request_payer=True`` as a keyword argument or have ``AWS_REQUESTER_PAYS=True`` + set in the environment. + + **Anonymous requests**: + + Pass ``skip_signature=True`` as a keyword argument or have ``AWS_SKIP_SIGNATURE=True`` + set in the environment. + """ + + @override + def __new__( + cls, + bucket: str | None = None, + *, + prefix: str | None = None, + config: S3Config | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: S3CredentialProvider | None = None, + **kwargs: Unpack[S3Config], # pyright: ignore[reportGeneralTypeIssues] + ) -> Self: + """Create a new S3Store. + + Args: + bucket: The AWS bucket to use. + + Keyword Args: + prefix: A prefix within the bucket to use for all operations. + config: AWS configuration. Values in this config will override values inferred from the + environment. Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom S3 credentials. + kwargs: AWS configuration values. Supports the same values as ``config``, but as named keyword + args. + + Returns: + S3Store + + """ + return super().__new__( # pyright: ignore[reportUnknownVariableType] + cls, + bucket, + prefix=prefix, + config=config, + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, # pyright: ignore[reportCallIssue] bucket appears in both S3Config and explicitly above + ) + + @override + @classmethod + def from_url( + cls, + url: str, + *, + config: S3Config | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: S3CredentialProvider | None = None, + **kwargs: Unpack[S3Config], + ) -> Self: + """Parse available connection info from a well-known storage URL. + + Any path on the URL will be assigned as the ``prefix`` for the store. So if you + pass ``s3://bucket/path/to/directory``, the store will be created with a prefix of + ``path/to/directory``, and all further operations will use paths relative to that + prefix. + + The supported url schemes are: + + - ``s3:///`` + - ``s3a:///`` + - ``https://s3..amazonaws.com/`` + - ``https://.s3..amazonaws.com`` + - ``https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket`` + + Args: + url: well-known storage URL. + + Keyword Args: + config: AWS Configuration. Values in this config will override values inferred from the url. + Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom S3 credentials. + kwargs: AWS configuration values. Supports the same values as ``config``, but as named keyword + args. + + + Returns: + S3Store + + """ + return super(cls).from_url( + url, + config=config, + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, + ) + + @override + def __eq__(self, value: object) -> bool: + return super().__eq__(value) + + @override + def __getnewargs_ex__(self): # pyright: ignore[reportUnknownParameterType] + return super().__getnewargs_ex__() # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + + @property + @override + def prefix(self) -> str | None: + """Get the prefix applied to all operations in this store, if any.""" + return super().prefix + + @property + @override + def config(self) -> S3Config: + """Get the underlying S3 config parameters.""" + return super().config + + @property + @override + def client_options(self) -> ClientConfig | None: + """Get the store's client configuration.""" + return super().client_options + + @property + @override + def credential_provider(self) -> S3CredentialProvider | None: + """Get the store's credential provider.""" + return super().credential_provider + + @property + @override + def retry_config(self) -> RetryConfig | None: + """Get the store's retry configuration.""" + return super().retry_config diff --git a/vortex-python/python/vortex/store/_azure.py b/vortex-python/python/vortex/store/_azure.py new file mode 100644 index 00000000000..1b3ec1b4e52 --- /dev/null +++ b/vortex-python/python/vortex/store/_azure.py @@ -0,0 +1,407 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: Copyright (c) 2024 Development Seed + +from collections.abc import Coroutine +from datetime import datetime +from typing import Any, Protocol, Self, TypeAlias, TypedDict, Unpack + +from typing_extensions import override + +from .._lib import store as _store # pyright: ignore[reportMissingModuleSource] +from ._client import ClientConfig +from ._retry import RetryConfig + + +class AzureConfig(TypedDict, total=False): + """Configuration parameters for AzureStore.""" + + account_name: str + """The name of the azure storage account. (Required.) + + **Environment variable**: ``AZURE_STORAGE_ACCOUNT_NAME``. + """ + account_key: str + """Master key for accessing storage account. + + **Environment variables**: + + - ``AZURE_STORAGE_ACCOUNT_KEY`` + - ``AZURE_STORAGE_ACCESS_KEY`` + - ``AZURE_STORAGE_MASTER_KEY`` + """ + client_id: str + """The client id for use in client secret or k8s federated credential flow. + + **Environment variables**: + + - ``AZURE_STORAGE_CLIENT_ID`` + - ``AZURE_CLIENT_ID`` + """ + client_secret: str + """The client secret for use in client secret flow. + + **Environment variables**: + + - ``AZURE_STORAGE_CLIENT_SECRET`` + - ``AZURE_CLIENT_SECRET`` + """ + tenant_id: str + """The tenant id for use in client secret or k8s federated credential flow. + + **Environment variables**: + + - ``AZURE_STORAGE_TENANT_ID`` + - ``AZURE_STORAGE_AUTHORITY_ID`` + - ``AZURE_TENANT_ID`` + - ``AZURE_AUTHORITY_ID`` + """ + authority_host: str + """Sets an alternative authority host for OAuth based authorization. + + Defaults to ``https://login.microsoftonline.com``. + + Common hosts for azure clouds are: + + - Azure China: ``"https://login.chinacloudapi.cn"`` + - Azure Germany: ``"https://login.microsoftonline.de"`` + - Azure Government: ``"https://login.microsoftonline.us"`` + - Azure Public: ``"https://login.microsoftonline.com"`` + + **Environment variables**: + + - ``AZURE_STORAGE_AUTHORITY_HOST`` + - ``AZURE_AUTHORITY_HOST`` + """ + sas_key: str + """ + Shared access signature. + + The signature is expected to be percent-encoded, much like they are provided in + the azure storage explorer or azure portal. + + **Environment variables**: + + - ``AZURE_STORAGE_SAS_KEY`` + - ``AZURE_STORAGE_SAS_TOKEN`` + """ + token: str + """A static bearer token to be used for authorizing requests. + + **Environment variable**: ``AZURE_STORAGE_TOKEN``. + """ + use_emulator: bool + """Set if the Azure emulator should be used (defaults to ``False``). + + **Environment variable**: ``AZURE_STORAGE_USE_EMULATOR``. + """ + use_fabric_endpoint: bool + """Set if Microsoft Fabric url scheme should be used (defaults to ``False``). + + When disabled the url scheme used is ``https://{account}.blob.core.windows.net``. + When enabled the url scheme used is ``https://{account}.dfs.fabric.microsoft.com``. + + .. note:: + + ``endpoint`` will take precedence over this option. + """ + endpoint: str + """Override the endpoint used to communicate with blob storage. + + Defaults to ``https://{account}.blob.core.windows.net``. + + By default, only HTTPS schemes are enabled. To connect to an HTTP endpoint, enable + ``allow_http`` in the client options. + + **Environment variables**: + + - ``AZURE_STORAGE_ENDPOINT`` + - ``AZURE_ENDPOINT`` + """ + msi_endpoint: str + """Endpoint to request a imds managed identity token. + + **Environment variables**: + + - ``AZURE_MSI_ENDPOINT`` + - ``AZURE_IDENTITY_ENDPOINT`` + """ + object_id: str + """Object id for use with managed identity authentication. + + **Environment variable**: ``AZURE_OBJECT_ID``. + """ + msi_resource_id: str + """Msi resource id for use with managed identity authentication. + + **Environment variable**: ``AZURE_MSI_RESOURCE_ID``. + """ + federated_token_file: str + """Sets a file path for acquiring azure federated identity token in k8s. + + Requires ``client_id`` and ``tenant_id`` to be set. + + **Environment variable**: ``AZURE_FEDERATED_TOKEN_FILE``. + """ + use_azure_cli: bool + """Set if the Azure Cli should be used for acquiring access token. + + . + + **Environment variable**: ``AZURE_USE_AZURE_CLI``. + """ + skip_signature: bool + """If enabled, ``AzureStore`` will not fetch credentials and will not sign requests. + + This can be useful when interacting with public containers. + + **Environment variable**: ``AZURE_SKIP_SIGNATURE``. + """ + container_name: str + """Container name. + + **Environment variable**: ``AZURE_CONTAINER_NAME``. + """ + disable_tagging: bool + """If set to ``True`` will ignore any tags provided to uploads. + + **Environment variable**: ``AZURE_DISABLE_TAGGING``. + """ + fabric_token_service_url: str + """Service URL for Fabric OAuth2 authentication. + + **Environment variable**: ``AZURE_FABRIC_TOKEN_SERVICE_URL``. + """ + fabric_workload_host: str + """Workload host for Fabric OAuth2 authentication. + + **Environment variable**: ``AZURE_FABRIC_WORKLOAD_HOST``. + """ + fabric_session_token: str + """Session token for Fabric OAuth2 authentication. + + **Environment variable**: ``AZURE_FABRIC_SESSION_TOKEN``. + """ + fabric_cluster_identifier: str + """Cluster identifier for Fabric OAuth2 authentication. + + **Environment variable**: ``AZURE_FABRIC_CLUSTER_IDENTIFIER``. + """ + + +class AzureAccessKey(TypedDict): + """A shared Azure Storage Account Key. + + See `Authorize with Shared Key `_. + + """ + + access_key: str + """Access key value.""" + + expires_at: datetime | None + """Expiry datetime of credential. The datetime should have time zone set. + + If None, the credential will never expire. + """ + + +class AzureSASToken(TypedDict): + """A shared access signature. + + See `Shared Access Signatures `_. + + """ + + sas_token: str | list[tuple[str, str]] + """SAS token.""" + + expires_at: datetime | None + """Expiry datetime of credential. The datetime should have time zone set. + + If None, the credential will never expire. + """ + + +class AzureBearerToken(TypedDict): + """An authorization token. + + See `Authorize with Azure AD `_. + + """ + + token: str + """Bearer token.""" + + expires_at: datetime | None + """Expiry datetime of credential. The datetime should have time zone set. + + If None, the credential will never expire. + """ + + +AzureCredential: TypeAlias = AzureAccessKey | AzureSASToken | AzureBearerToken +"""A type alias for supported azure credentials to be returned from ``AzureCredentialProvider``. + +""" + + +class AzureCredentialProvider(Protocol): + """A type hint for a synchronous or asynchronous callback to provide custom Azure credentials. + + This should be passed into the ``credential_provider`` parameter of ``AzureStore``. + + """ + + def __call__(self) -> AzureCredential | Coroutine[Any, Any, AzureCredential]: # pyright: ignore[reportExplicitAny] + """Return an ``AzureCredential``.""" + ... + + +class AzureStore(_store.AzureStore): + """Interface to a Microsoft Azure Blob Storage container. + + All constructors will check for environment variables. Refer to + :class:`~vortex.store.AzureConfig` for valid environment variables. + """ + + def __new__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "container_name" + cls, + container_name: str | None = None, + *, + prefix: str | None = None, + config: AzureConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: AzureCredentialProvider | None = None, + **kwargs: Unpack[AzureConfig], # pyright: ignore[reportGeneralTypeIssues] + ) -> Self: + """Construct a new AzureStore. + + Args: + container_name: the name of the container. + + Keyword Args: + prefix: A prefix within the bucket to use for all operations. + config: Azure Configuration. Values in this config will override values inferred from + the url. Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom Azure credentials. + kwargs: Azure configuration values. Supports the same values as ``config``, but as named + keyword args. + + Returns: + AzureStore + + """ + return super().__new__( # pyright: ignore[reportUnknownVariableType] + cls, + container_name, + prefix=prefix, + config=config, + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, # pyright: ignore[reportCallIssue] + ) + + @override + @classmethod + def from_url( + cls, + url: str, + *, + prefix: str | None = None, + config: AzureConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: AzureCredentialProvider | None = None, + **kwargs: Unpack[AzureConfig], + ) -> Self: + """Construct a new AzureStore with values populated from a well-known storage URL. + + Any path on the URL will be assigned as the ``prefix`` for the store. So if you + pass ``https://.blob.core.windows.net//path/to/directory``, + the store will be created with a prefix of ``path/to/directory``, and all further + operations will use paths relative to that prefix. + + The supported url schemes are: + + - ``abfs[s]:///`` (according to `fsspec `_) + - ``abfs[s]://@.dfs.core.windows.net/`` + - ``abfs[s]://@.dfs.fabric.microsoft.com/`` + - ``az:///`` (according to `fsspec `_) + - ``adl:///`` (according to `fsspec `_) + - ``azure:///`` (custom) + - ``https://.dfs.core.windows.net`` + - ``https://.blob.core.windows.net`` + - ``https://.blob.core.windows.net/`` + - ``https://.dfs.fabric.microsoft.com`` + - ``https://.dfs.fabric.microsoft.com/`` + - ``https://.blob.fabric.microsoft.com`` + - ``https://.blob.fabric.microsoft.com/`` + + Args: + url: well-known storage URL. + + Keyword Args: + prefix: A prefix within the bucket to use for all operations. + config: Azure Configuration. Values in this config will override values inferred from the + url. Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom Azure credentials. + kwargs: Azure configuration values. Supports the same values as ``config``, but as named keyword + args. + + Returns: + AzureStore + + """ + return super().from_url( + url, + prefix=prefix, + config=config, + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, + ) + + @override + def __eq__(self, value: object) -> bool: + return super().__eq__(value) + + @override + def __getnewargs_ex__(self): # pyright: ignore[reportUnknownParameterType] + return super().__getnewargs_ex__() # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + + @property + @override + def prefix(self) -> str | None: + """Get the prefix applied to all operations in this store, if any.""" + return super().prefix + + @property + @override + def config(self) -> AzureConfig: + """Get the underlying Azure config parameters.""" + return super().config + + @property + @override + def client_options(self) -> ClientConfig | None: + """Get the store's client configuration.""" + return super().client_options + + @property + @override + def credential_provider(self) -> AzureCredentialProvider | None: + """Get the store's credential provider.""" + return super().credential_provider + + @property + @override + def retry_config(self) -> RetryConfig | None: + """Get the store's retry configuration.""" + return super().retry_config diff --git a/vortex-python/python/vortex/store/_client.py b/vortex-python/python/vortex/store/_client.py new file mode 100644 index 00000000000..9efe2055c40 --- /dev/null +++ b/vortex-python/python/vortex/store/_client.py @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: Copyright (c) 2024 Development Seed + +from datetime import timedelta +from typing import TypedDict + + +class ClientConfig(TypedDict, total=False): + """HTTP client configuration. + + For timeout values (``connect_timeout``, ``http2_keep_alive_timeout``, + ``pool_idle_timeout``, and ``timeout``), values can either be Python ``timedelta`` + objects, or they can be "human-readable duration strings". + + The human-readable duration string is a concatenation of time spans. Where each time + span is an integer number and a suffix. Supported suffixes: + + - ``nsec``, ``ns`` -- nanoseconds + - ``usec``, ``us`` -- microseconds + - ``msec``, ``ms`` -- milliseconds + - ``seconds``, ``second``, ``sec``, ``s`` + - ``minutes``, ``minute``, ``min``, ``m`` + - ``hours``, ``hour``, ``hr``, ``h`` + - ``days``, ``day``, ``d`` + - ``weeks``, ``week``, ``w`` + - ``months``, ``month``, ``M`` -- defined as 30.44 days + - ``years``, ``year``, ``y`` -- defined as 365.25 days + + For example: + + - ``"2h 37min"`` + - ``"32ms"`` + + """ + + allow_http: bool + """Allow non-TLS, i.e. non-HTTPS connections.""" + allow_invalid_certificates: bool + """Skip certificate validation on https connections. + + .. warning:: + + You should think very carefully before using this method. If + invalid certificates are trusted, *any* certificate for *any* site + will be trusted for use. This includes expired certificates. This + introduces significant vulnerabilities, and should only be used + as a last resort or for testing. + """ + connect_timeout: str | timedelta + """Timeout for only the connect phase of a Client""" + default_content_type: str + """Default ``CONTENT_TYPE`` for uploads""" + default_headers: dict[str, str] | dict[str, bytes] + """Default headers to be sent with each request""" + http1_only: bool + """Only use http1 connections.""" + http2_keep_alive_interval: str + """Interval for HTTP2 Ping frames should be sent to keep a connection alive.""" + http2_keep_alive_timeout: str | timedelta + """Timeout for receiving an acknowledgement of the keep-alive ping.""" + http2_keep_alive_while_idle: str + """Enable HTTP2 keep alive pings for idle connections""" + http2_only: bool + """Only use http2 connections""" + pool_idle_timeout: str | timedelta + """The pool max idle timeout. + + This is the length of time an idle connection will be kept alive. + """ + pool_max_idle_per_host: str + """Maximum number of idle connections per host.""" + proxy_url: str + """HTTP proxy to use for requests.""" + timeout: str | timedelta + """Request timeout. + + The timeout is applied from when the request starts connecting until the + response body has finished. + """ + user_agent: str + """User-Agent header to be used by this client.""" diff --git a/vortex-python/python/vortex/store/_gcs.py b/vortex-python/python/vortex/store/_gcs.py new file mode 100644 index 00000000000..e1512434093 --- /dev/null +++ b/vortex-python/python/vortex/store/_gcs.py @@ -0,0 +1,239 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: Copyright (c) 2024 Development Seed + +from collections.abc import Coroutine +from datetime import datetime +from typing import Any, Protocol, Self, TypedDict, Unpack + +from typing_extensions import override + +from .._lib import store as _store # pyright: ignore[reportMissingModuleSource] +from ._client import ClientConfig +from ._retry import RetryConfig + + +class GCSConfig(TypedDict, total=False): + """Configuration parameters for GCSStore.""" + + service_account: str + """Path to the service account file. + + This or ``service_account_key`` must be set. + + Example value ``"/tmp/gcs.json"``. Example contents of ``gcs.json``: + + .. code-block:: json + + { + "gcs_base_url": "https://localhost:4443", + "disable_oauth": true, + "client_email": "", + "private_key": "" + } + + **Environment variables**: + + - ``GOOGLE_SERVICE_ACCOUNT`` + - ``GOOGLE_SERVICE_ACCOUNT_PATH`` + """ + + service_account_key: str + """The serialized service account key. + + The service account must be in the JSON format. This or ``with_service_account_path`` + must be set. + + **Environment variable**: ``GOOGLE_SERVICE_ACCOUNT_KEY``. + """ + + bucket: str + """Bucket name. (required) + + **Environment variables**: + + - ``GOOGLE_BUCKET`` + - ``GOOGLE_BUCKET_NAME`` + """ + + application_credentials: str + """Application credentials path. + + See . + + **Environment variable**: ``GOOGLE_APPLICATION_CREDENTIALS``. + """ + + skip_signature: bool + """If ``True``, GCSStore will not fetch credentials and will not sign requests. + + This can be useful when interacting with public GCS buckets that deny authorized requests. + + **Environment variable**: ``GOOGLE_SKIP_SIGNATURE``. + """ + + +class GCSCredential(TypedDict): + """A Google Cloud Storage Credential.""" + + token: str + """An HTTP bearer token.""" + + expires_at: datetime | None + """Expiry datetime of credential. The datetime should have time zone set. + + If None, the credential will never expire. + """ + + +class GCSCredentialProvider(Protocol): + """A type hint for a synchronous or asynchronous callback to provide custom Google Cloud Storage credentials. + + This should be passed into the ``credential_provider`` parameter of ``GCSStore``. + + """ + + def __call__(self) -> GCSCredential | Coroutine[Any, Any, GCSCredential]: # pyright: ignore[reportExplicitAny] + """Return a ``GCSCredential``.""" + ... + + +class GCSStore(_store.GCSStore): + """Interface to Google Cloud Storage. + + All constructors will check for environment variables. Refer to + :class:`~vortex.store.GCSConfig` for valid environment variables. + + If no credentials are explicitly provided, they will be sourced from the environment + as documented + `here `_. + """ + + def __new__( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "bucket" + cls, + bucket: str | None = None, + *, + prefix: str | None = None, + config: GCSConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: GCSCredentialProvider | None = None, + **kwargs: Unpack[GCSConfig], # pyright: ignore[reportGeneralTypeIssues] + ) -> Self: + """Construct a new GCSStore. + + Args: + bucket: The GCS bucket to use. + + Keyword Args: + prefix: A prefix within the bucket to use for all operations. + config: GCS Configuration. Values in this config will override values inferred from the + environment. Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom Google credentials. + kwargs: GCS configuration values. Supports the same values as ``config``, + but as named keyword args. + + Returns: + GCSStore + + """ + return super().__new__( # pyright: ignore[reportUnknownVariableType] + cls, + bucket, + prefix=prefix, + config=config, + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, # pyright: ignore[reportCallIssue] + ) + + @override + @classmethod + def from_url( + cls, + url: str, + *, + prefix: str | None = None, + config: GCSConfig | None = None, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + credential_provider: GCSCredentialProvider | None = None, + **kwargs: Unpack[GCSConfig], + ) -> Self: + """Construct a new GCSStore with values populated from a well-known storage URL. + + Any path on the URL will be assigned as the ``prefix`` for the store. So if you + pass ``gs:///path/to/directory``, the store will be created with a prefix + of ``path/to/directory``, and all further operations will use paths relative to + that prefix. + + The supported url schemes are: + + - ``gs:///`` + + Args: + url: well-known storage URL. + + Keyword Args: + prefix: A prefix within the bucket to use for all operations. + config: GCS Configuration. Values in this config will override values inferred from the + url. Defaults to None. + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + credential_provider: A callback to provide custom Google credentials. + kwargs: GCS configuration values. Supports the same values as ``config``, but as named keyword + args. + + Returns: + GCSStore + + """ + return super().from_url( # type: ignore[misc] # Overlap between argument names and ** TypedDict items: "bucket" + url, + prefix=prefix, + config=config, + client_options=client_options, + retry_config=retry_config, + credential_provider=credential_provider, + **kwargs, + ) + + @override + def __eq__(self, value: object) -> bool: + return super().__eq__(value) + + @override + def __getnewargs_ex__(self): # pyright: ignore[reportUnknownParameterType] + return super().__getnewargs_ex__() # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + + @property + @override + def prefix(self) -> str | None: + """Get the prefix applied to all operations in this store, if any.""" + return super().prefix + + @property + @override + def config(self) -> GCSConfig: + """Get the underlying GCS config parameters.""" + return super().config + + @property + @override + def client_options(self) -> ClientConfig | None: + """Get the store's client configuration.""" + return super().client_options + + @property + @override + def credential_provider(self) -> GCSCredentialProvider | None: + """Get the store's credential provider.""" + return super().credential_provider + + @property + @override + def retry_config(self) -> RetryConfig | None: + """Get the store's retry configuration.""" + return super().retry_config diff --git a/vortex-python/python/vortex/store/_http.py b/vortex-python/python/vortex/store/_http.py new file mode 100644 index 00000000000..57d46bb5ebb --- /dev/null +++ b/vortex-python/python/vortex/store/_http.py @@ -0,0 +1,82 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: Copyright (c) 2024 Development Seed + +from typing import Self + +from typing_extensions import override + +from .._lib import store as _store # pyright: ignore[reportMissingModuleSource] +from ._client import ClientConfig +from ._retry import RetryConfig + + +class HTTPStore(_store.HTTPStore): + """Configure a connection to a generic HTTP server.""" + + def __new__( + cls, + url: str, + *, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + ): + """Construct a new HTTPStore from a URL. + + Any path on the URL will be assigned as the ``prefix`` for the store. So if you + pass ``https://example.com/path/to/directory``, the store will be created with a + prefix of ``path/to/directory``, and all further operations will use paths + relative to that prefix. + + Args: + url: The base URL to use for the store. + + Keyword Args: + client_options: HTTP Client options. Defaults to None. + retry_config: Retry configuration. Defaults to None. + + Returns: + HTTPStore + + """ + return super().__new__(cls, url, client_options=client_options, retry_config=retry_config) + + @override + @classmethod + def from_url( + cls, + url: str, + *, + client_options: ClientConfig | None = None, + retry_config: RetryConfig | None = None, + ) -> Self: + """Construct a new HTTPStore from a URL. + + This is an alias of the :class:`~vortex.store.HTTPStore` constructor. + """ + return super(cls).from_url(url, client_options=client_options, retry_config=retry_config) + + @override + def __eq__(self, value: object) -> bool: + return super().__eq__(value) + + @override + def __getnewargs_ex__(self): # pyright: ignore[reportUnknownParameterType] + return super().__getnewargs_ex__() # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType] + + @property + @override + def url(self) -> str: + """Get the base url of this store.""" + return super().url + + @property + @override + def client_options(self) -> ClientConfig | None: + """Get the store's client configuration.""" + return super().client_options + + @property + @override + def retry_config(self) -> RetryConfig | None: + """Get the store's retry configuration.""" + return super().retry_config diff --git a/vortex-python/python/vortex/store/_local.py b/vortex-python/python/vortex/store/_local.py new file mode 100644 index 00000000000..891c8e968b4 --- /dev/null +++ b/vortex-python/python/vortex/store/_local.py @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: Copyright (c) 2024 Development Seed + +from pathlib import Path +from typing import Self + +from typing_extensions import override + +from .._lib import store as _store # pyright: ignore[reportMissingModuleSource] + + +class LocalStore(_store.LocalStore): + """An ObjectStore interface to local filesystem storage. + + Create a local store with an optional directory prefix:: + + from pathlib import Path + + store = LocalStore() + store = LocalStore(prefix="/path/to/directory") + store = LocalStore(prefix=Path(".")) + """ + + def __new__( + cls, + prefix: str | Path | None = None, + *, + automatic_cleanup: bool = False, + mkdir: bool = False, + ) -> Self: + """Create a new LocalStore. + + Args: + prefix: Use the specified prefix applied to all paths. Defaults to ``None``. + + Keyword Args: + automatic_cleanup: if ``True``, enables automatic cleanup of empty directories + when deleting files. Defaults to False. + mkdir: if ``True`` and ``prefix`` is not ``None``, the directory at ``prefix`` will + attempt to be created. Note that this root directory will not be cleaned + up, even if ``automatic_cleanup`` is ``True``. + + """ + return super().__new__(cls, prefix, automatic_cleanup=automatic_cleanup, mkdir=mkdir) + + @classmethod + @override + def from_url( + cls, + url: str, + *, + automatic_cleanup: bool = False, + mkdir: bool = False, + ) -> Self: + """Construct a new LocalStore from a ``file://`` URL. + + **Examples:** + + Construct a new store pointing to the root of your filesystem:: + + url = "file:///" + store = LocalStore.from_url(url) + + Construct a new store with a directory prefix:: + + url = "file:///Users/kyle/" + store = LocalStore.from_url(url) + + """ + return super(cls).from_url(url, automatic_cleanup=automatic_cleanup, mkdir=mkdir) + + @override + def __eq__(self, value: object, /) -> bool: + return super().__eq__(value) + + @override + def __getnewargs_ex__(self) -> tuple[tuple[()], dict[str, object]]: + return super().__getnewargs_ex__() + + @property + @override + def prefix(self) -> Path | None: + """Get the prefix applied to all operations in this store, if any.""" + return super().prefix diff --git a/vortex-python/python/vortex/store/_memory.py b/vortex-python/python/vortex/store/_memory.py new file mode 100644 index 00000000000..763424732eb --- /dev/null +++ b/vortex-python/python/vortex/store/_memory.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: Copyright (c) 2024 Development Seed + +from .._lib import store as _store # pyright: ignore[reportMissingModuleSource] + + +class MemoryStore(_store.MemoryStore): + """A fully in-memory implementation of ObjectStore. + + Create a new in-memory store:: + + store = MemoryStore() + """ + + def __new__(cls): + return super().__new__(cls) diff --git a/vortex-python/python/vortex/store/_retry.py b/vortex-python/python/vortex/store/_retry.py new file mode 100644 index 00000000000..41eb18d56f7 --- /dev/null +++ b/vortex-python/python/vortex/store/_retry.py @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: MIT +# SPDX-FileCopyrightText: Copyright (c) 2024 Development Seed + +from datetime import timedelta +from typing import TypedDict + + +class BackoffConfig(TypedDict, total=False): + """Exponential backoff with jitter. + + See `Exponential Backoff and Jitter `_. + + """ + + init_backoff: timedelta + """The initial backoff duration. + + Defaults to 100 milliseconds. + """ + + max_backoff: timedelta + """The maximum backoff duration. + + Defaults to 15 seconds. + """ + + base: int | float + """The base of the exponential to use. + + Defaults to ``2``. + """ + + +class RetryConfig(TypedDict, total=False): + """The configuration for how to respond to request errors. + + The following categories of error will be retried: + + * 5xx server errors + * Connection errors + * Dropped connections + * Timeouts for `safe `_ / read-only requests + + Requests will be retried up to some limit, using exponential + backoff with jitter. See :class:`~vortex.store.BackoffConfig` for + more information. + """ + + backoff: BackoffConfig + """The backoff configuration. + + Defaults to the values listed above if not provided. + """ + + max_retries: int + """ + The maximum number of times to retry a request + + Set to 0 to disable retries. + + Defaults to 10. + """ + + retry_timeout: timedelta + """ + The maximum length of time from the initial request + after which no further retries will be attempted + + This not only bounds the length of time before a server + error will be surfaced to the application, but also bounds + the length of time a request's credentials must remain valid. + + As requests are retried without renewing credentials or + regenerating request payloads, this number should be kept + below 5 minutes to avoid errors due to expired credentials + and/or request payloads. + + Defaults to 3 minutes. + """