apache · renato2099 · Dec 13, 2025 · Dec 14, 2025 · Dec 14, 2025 · Jan 4, 2026
diff --git a/docs/source/user-guide/common-operations/joins.rst b/docs/source/user-guide/common-operations/joins.rst
@@ -78,6 +78,10 @@ Full Join
 A full join merges rows from two tables based on a related column, returning all rows from both tables, even if there
 is no match. Unmatched rows will have null values.
 
+The drop_duplicate_keys option is not supported for FULL JOINs because left and right join key columns may contain
+non-equivalent values (including NULLs). For FULL JOINs, both key columns are always preserved and users should perform
+explicit post-processing if desired.
+
 .. ipython:: python
 
     left.join(right, left_on="customer_id", right_on="id", how="full")

diff --git a/python/datafusion/dataframe.py b/python/datafusion/dataframe.py
@@ -832,6 +832,10 @@ def join(
             drop_duplicate_keys: When True, the columns from the right DataFrame
                 that have identical names in the ``on`` fields to the left DataFrame
                 will be dropped.
+                This option is not supported for FULL JOINs because left and right join
+                key columns may contain non-equivalent values (including NULLs). For
+                FULL JOINs, both key columns are always preserved and users should
+                perform explicit post-processing if desired.
             join_keys: Tuple of two lists of column names to join on. [Deprecated]
 
         Returns:

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
@@ -647,6 +647,39 @@ def test_unnest_without_nulls(nested_df):
     assert result.column(1) == pa.array([7, 8, 8, 9, 9, 9])
 
 
+def test_join_full():
+    ctx = SessionContext()
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])],
+        names=["num", "name"],
+    )
+    df1 = ctx.create_dataframe([[batch]], "l")
+
+    batch = pa.RecordBatch.from_arrays(
+        [pa.array([1, 3, 5]), pa.array([True, True, False])],
+        names=["num", "value"],
+    )
+    df2 = ctx.create_dataframe([[batch]], "r")
+
+    df3 = df1.join(df2, on="num", how="full")
+
+    expected = {
+        "num": [1, 3, None, 5],
+        "name": ["a", "c", "b", None],
+        "value": [True, True, None, False],
+    }
+    assert expected == df3.to_pydict()
+
+    # To show how user can do post-processing
+    df4 = df3.select_exprs("coalesce(l.num, r.num) as num", "l.name", "r.value")
+    expected = {
+        "num": [1, 3, 2, 5],
+        "name": ["a", "c", "b", None],
+        "value": [True, True, None, False],
+    }
+    assert expected == df4.to_pydict()
+
+
 def test_join():
     ctx = SessionContext()
 

diff --git a/src/dataframe.rs b/src/dataframe.rs
@@ -676,7 +676,8 @@ impl PyDataFrame {
             None,
         )?;
 
-        if drop_duplicate_keys {
+        let allow_drop_duplicate_keys = drop_duplicate_keys && join_type != JoinType::Full;
+        if allow_drop_duplicate_keys {
             let mutual_keys = left_keys
                 .iter()
                 .zip(right_keys.iter())