pandas中dataframe转为pyspark的dataframe
一、
二、代码
##粗召回数据处理
pandas_df = pd.read_csv("./clothes_women_1356.txt",sep='\t',dtype=str)
schema = StructType([StructField("item_sku_id", StringType(), True)
,StructField("item_first_cate_cd", StringType(), True)
,StructField("item_first_cate_name", StringType(), True)
,StructField("item_second_cate_cd", StringType(), True)
,StructField("item_second_cate_name", StringType(), True)
,StructField("item_third_cate_cd", StringType(), True)
,StructField("item_third_cate_name", StringType(), True)
,StructField("sku_name", StringType(), True)
,StructField("pic", StringType(), True)])
spark_df = spark.createDataFrame(pandas_df, schema=schema)
spark_df.registerTempTable("match_fs")
xtl_df = get_recall_data(dt)
xtl_df = xtl_df.repartition(500)