f = open('训练数据集2时间处理后.csv')
reader = pd.read_csv(f, sep=',', iterator=True)
loop = True
chunkSize = 10000000
chunks = []
newdata =[]
while loop:
try:
chunk = reader.get_chunk(chunkSize)
chunks.append(chunk)
except StopIteration:
loop = False
print("Iteration is stopped.")
for s in chunks:
dataset=s.values
my_maxtrix=[]
for k in dataset:
aa=[]
for j in k:
aa.append(j)
my_maxtrix.append(aa)
line=len(my_maxtrix)
v=1000
for i in range(int(line/v)):
newdata.append(my_maxtrix[random.randrange(i*v,(i+1)*v,1)])
newdata.append(my_maxtrix[random.randrange(int(line/v)*v-1,line,1)])
with open("训练数据集1随机挑选.csv","w", newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["ip","app","device","os","channel","click_time","is_attributed"])
writer.writerows(newdata)
csvfile.close()