1import pandas as pd
2import numpy as np
3
4filename = 'hugedatafile.csv'
5nlinesfile = 10000000
6nlinesrandomsample = 10000
7lines2skip = np.random.choice(np.arange(1,nlinesfile+1), (nlinesfile-nlinesrandomsample), replace=False)
8df = pd.read_csv(filename, skiprows=lines2skip)
1import pandas as pd
2
3data = {
4 'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
5 'Age': [10, 20, 30, 40, 50, 60]
6 }
7
8df = pd.DataFrame(data)
9print(df.sample(n=2)) # where 2 is the number of random rows