1import pandas as pd
2from pyspark.sql import SparkSession
3
4filename = <'path to file'>
5spark = SparkSession.build.appName('pandasToSpark').getOrCreate()
6# Assuming file is csv
7pandas_df = pd.read_csv(filename)
8spark_df = spark.CreateDataFrame(pandas_df)
1
2from pyspark.sql import SparkSession
3#Create PySpark SparkSession
4spark = SparkSession.builder \
5 .master("local[1]") \
6 .appName("SparkByExamples.com") \
7 .getOrCreate()
8#Create PySpark DataFrame from Pandas
9sparkDF=spark.createDataFrame(pandasDF)
10sparkDF.printSchema()
11sparkDF.show()
12
13#Outputs below schema & DataFrame
14
15root
16 |-- Name: string (nullable = true)
17 |-- Age: long (nullable = true)
18
19+------+---+
20| Name|Age|
21+------+---+
22| Scott| 50|
23| Jeff| 45|
24|Thomas| 54|
25| Ann| 34|
26+------+---+
27