read large sas file ilarger than memory n python

Solutions on MaxInterview for read large sas file ilarger than memory n python by the best coders in the world

showing results for - "read large sas file ilarger than memory n python"

09 Oct 2020

1import pandas as pd
2import pyreadstat
3filename = 'foo.SAS7BDAT'
4CHUNKSIZE = 50000
5offset = 0
6allChunk,_ = getChunk(row['filePath'], row_limit=CHUNKSIZE, row_offset=offset)
7allChunk = allChunk.astype('category')
8
9while True:
10    offset += CHUNKSIZE
11    # for xpt data, use pyreadstat.read_xpt()
12    chunk, _ = pyreadstat.read_sas7bdat(filename, row_limit=CHUNKSIZE, row_offset=offset)
13    if chunk.empty: break  # if chunk is empty, it means the entire data has been read, so break
14
15    for eachCol in chunk:  #converting each column to categorical 
16        colUnion = pd.api.types.union_categoricals([allChunk[eachCol], chunk[eachCol]])
17        allChunk[eachCol] = pd.Categorical(allChunk[eachCol], categories=colUnion.categories)
18        chunk[eachCol] = pd.Categorical(chunk[eachCol], categories=colUnion.categories)
19
20    allChunk = pd.concat([allChunk, chunk])  #Append each chunk to the resulting dataframe
21

source