转换为Unix时间戳和基本算术应该是诀窍:
from pyspark.sql import Row from pyspark.sql.functions import col, unix_timestamp, round df = sc.parallelize([ Row(dt='1970-01-01 00:00:00'), Row(dt='2015-09-16 05:39:46'), Row(dt='2015-09-16 05:40:46'), Row(dt='2016-03-05 02:00:10'), ]).toDF() ## unix_timestamp converts string to Unix timestamp (bigint / long) ## in seconds. Divide by 60, round, multiply by 60 and cast ## should work just fine. ## dt_truncated = ((round(unix_timestamp(col("dt")) / 60) * 60) .cast("timestamp")) df.withColumn("dt_truncated", dt_truncated).show(10, False) ## +-------------------+---------------------+ ## |dt |dt_truncated | ## +-------------------+---------------------+ ## |1970-01-01 00:00:00|1970-01-01 00:00:00.0| ## |2015-09-16 05:39:46|2015-09-16 05:40:00.0| ## |2015-09-16 05:40:46|2015-09-16 05:41:00.0| ## |2016-03-05 02:00:10|2016-03-05 02:00:00.0| ## +-------------------+---------------------+