pyspark 数据类型
def get_sim_test(clear_stand_names_list,clear_names_list):
return [float(0),0.9]
相似度计算
udf_get_sim = F.udf(get_sim_test,ArrayType(FloatType()))
xtl_data1 = xtl_data.withColumn('sim_max',udf_get_sim(xtl_data.stand_names_cut,xtl_data.skunames_cut))
xtl_data1.select("standard_id","barndname_cn","capacity","pac_spec","stand_ids","stand_names","stand_names_cut","skuids","skunames","skunames_cut","sim_max").show()