diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e5d367958dd..bc7fe3cd683 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -69,10 +69,10 @@ jobs: - conda-python-3.12-no-numpy include: - name: conda-python-docs - cache: conda-python-3.10 + cache: conda-python-3.11 image: conda-python-docs - title: AMD64 Conda Python 3.10 Sphinx & Numpydoc - python: "3.10" + title: AMD64 Conda Python 3.11 Sphinx & Numpydoc + python: "3.11" - name: conda-python-3.11-nopandas cache: conda-python-3.11 image: conda-python diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst index 279ec5dc61d..22a3114fdd2 100644 --- a/docs/source/python/data.rst +++ b/docs/source/python/data.rst @@ -684,7 +684,7 @@ When using :class:`~.DictionaryArray` with pandas, the analogue is 6 NaN 7 baz dtype: category - Categories (3, object): ['foo', 'bar', 'baz'] + Categories (3, str): ['foo', 'bar', 'baz'] .. _data.record_batch: diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst index 9b4458c7488..8f963639689 100644 --- a/docs/source/python/ipc.rst +++ b/docs/source/python/ipc.rst @@ -160,12 +160,12 @@ DataFrame output: >>> with pa.ipc.open_file(buf) as reader: ... df = reader.read_pandas() >>> df[:5] - f0 f1 f2 - 0 1 foo True - 1 2 bar None - 2 3 baz False - 3 4 None True - 4 1 foo True + f0 f1 f2 + 0 1 foo True + 1 2 bar None + 2 3 baz False + 3 4 NaN True + 4 1 foo True Efficiently Writing and Reading Arrow Data ------------------------------------------ diff --git a/docs/source/python/pandas.rst b/docs/source/python/pandas.rst index 9999a5b7793..7aacaaff60c 100644 --- a/docs/source/python/pandas.rst +++ b/docs/source/python/pandas.rst @@ -170,7 +170,7 @@ number of possible values. >>> df = pd.DataFrame({"cat": pd.Categorical(["a", "b", "c", "a", "b", "c"])}) >>> df.cat.dtype.categories - Index(['a', 'b', 'c'], dtype='object') + Index(['a', 'b', 'c'], dtype='str') >>> df cat 0 a @@ -182,7 +182,7 @@ number of possible values. >>> table = pa.Table.from_pandas(df) >>> table pyarrow.Table - cat: dictionary + cat: dictionary ---- cat: [ -- dictionary: ["a","b","c"] -- indices: @@ -196,7 +196,7 @@ same categories of the Pandas DataFrame. >>> column = table[0] >>> chunk = column.chunk(0) >>> chunk.dictionary - + [ "a", "b", @@ -224,7 +224,7 @@ use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow >>> df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="h", periods=3)}) >>> df.dtypes - datetime datetime64[ns, UTC] + datetime datetime64[us, UTC] dtype: object >>> df datetime @@ -234,9 +234,9 @@ use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow >>> table = pa.Table.from_pandas(df) >>> table pyarrow.Table - datetime: timestamp[ns, tz=UTC] + datetime: timestamp[us, tz=UTC] ---- - datetime: [[2020-01-01 00:00:00.000000000Z,...,2020-01-01 02:00:00.000000000Z]] + datetime: [[2020-01-01 00:00:00.000000Z,2020-01-01 01:00:00.000000Z,2020-01-01 02:00:00.000000Z]] In this example the Pandas Timestamp is time zone aware (``UTC`` on this case), and this information is used to create the Arrow diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst index 638df963cdf..30a84b3dc6d 100644 --- a/docs/source/python/parquet.rst +++ b/docs/source/python/parquet.rst @@ -238,9 +238,9 @@ concatenate them into a single table. You can read individual row groups with >>> parquet_file.read_row_group(0) pyarrow.Table one: double - two: string + two: large_string three: bool - __index_level_0__: string + __index_level_0__: large_string ---- one: [[-1,null,2.5]] two: [["foo","bar","baz"]] @@ -352,7 +352,7 @@ and improved performance for columns with many repeated string values. one: double two: dictionary three: bool - __index_level_0__: string + __index_level_0__: large_string ---- one: [[-1,null,2.5]] two: [ -- dictionary: