def get_sim_test(clear_stand_names_list,clear_names_list):

return [float(0),0.9]

相似度计算

udf_get_sim = F.udf(get_sim_test,ArrayType(FloatType()))
xtl_data1 = xtl_data.withColumn('sim_max',udf_get_sim(xtl_data.stand_names_cut,xtl_data.skunames_cut))

xtl_data1.select("standard_id","barndname_cn","capacity","pac_spec","stand_ids","stand_names","stand_names_cut","skuids","skunames","skunames_cut","sim_max").show()

for i in xtl_data1.select("sim_max").collect():

print("sssss:",i["sim_max"])

标签: none

添加新评论