@blackbishop 改进你的回答...
import pyspark.sql.functions as F
df = spark.createDataFrame(
[([["a","b","c"], ["d","e","f"], ["g","h","i", "j"]],)],
["data"]
)
df.show(20, False)
df = df.withColumn("data1", F.explode("data"))
df.select('data1').show()
max_size = df.select(F.max(F.size('data1'))).collect()[0][0]
df.select(
*[F.col("data1")[i].alias(f"col_{i}") for i in range(max_size)]
).show()
+------------------------------------+
|data |
+------------------------------------+
|[[a, b, c], [d, e, f], [g, h, i, j]]|
+------------------------------------+
+------------+
| data1|
+------------+
| [a, b, c]|
| [d, e, f]|
|[g, h, i, j]|
+------------+
+-----+-----+-----+-----+
|col_0|col_1|col_2|col_3|
+-----+-----+-----+-----+
| a| b| c| null|
| d| e| f| null|
| g| h| i| j|
+-----+-----+-----+-----+