1import pandas as pd
2from pyspark.sql import SparkSession
3
4filename = <'path to file'>
5spark = SparkSession.build.appName('pandasToSpark').getOrCreate()
6# Assuming file is csv
7pandas_df = pd.read_csv(filename)
8spark_df = spark.CreateDataFrame(pandas_df)
1import numpy as np
2import pandas as pd
3
4# Enable Arrow-based columnar data transfers
5spark.conf.set("spark.sql.execution.arrow.enabled", "true")
6
7# Generate a pandas DataFrame
8pdf = pd.DataFrame(np.random.rand(100, 3))
9
10# Create a Spark DataFrame from a pandas DataFrame using Arrow
11df = spark.createDataFrame(pdf)