import pandas as pd
import pyreadstat
filename = 'foo.SAS7BDAT'
CHUNKSIZE = 50000
offset = 0
allChunk,_ = getChunk(row['filePath'], row_limit=CHUNKSIZE, row_offset=offset)
allChunk = allChunk.astype('category')
while True:
offset += CHUNKSIZE
chunk, _ = pyreadstat.read_sas7bdat(filename, row_limit=CHUNKSIZE, row_offset=offset)
if chunk.empty: break
for eachCol in chunk:
colUnion = pd.api.types.union_categoricals([allChunk[eachCol], chunk[eachCol]])
allChunk[eachCol] = pd.Categorical(allChunk[eachCol], categories=colUnion.categories)
chunk[eachCol] = pd.Categorical(chunk[eachCol], categories=colUnion.categories)
allChunk = pd.concat([allChunk, chunk])