使用DataFrame和numpy生成交叉表(笛卡爾積)
阿新 • • 發佈:2019-02-09
方法一:
import pandas as pd
from pandas import DataFrame
w_df = DataFrame(['w1','w2'],columns=['worker'])
d_df = DataFrame(['0101','0102','0103'],columns=['date'])
new_df = DataFrame(columns=['worker','date'])
for w_index,w_row in w_df.iterrows():
for d_index,d_row in d_df.iterrows():
w_data = w_row['worker' ]
d_data = d_row['date']
row = DataFrame([dict(worker=w_data, date=d_data), ])
new_df = new_df.append(row,ignore_index=True)
print new_df
#output
date worker
0 0101 w1
1 0102 w1
2 0103 w1
3 0101 w2
4 0102 w2
5 0103 w2
方法二:
date_list = ['2015-01-01','2015-01-02' ,'2015-01-03']
date_count = len(date_list)
workers = np.arange(15)
df = DataFrame(columns=['w','date'])
for worker in workers:
cur_arr = np.array([worker] * date_count)
cur_arr.shape = (date_count,1) #轉置
date_list = np.array(date_list)
date_list.shape = (date_count,1) #轉置
join_arr = np.hstack((cur_arr,date_list)) #水平方向合併
df = df.append(DataFrame(join_arr,columns=['w','date']),ignore_index=True)
print df
方法三
import itertools
for x in itertools.product(['w1','w2'],['0101','0102','0103']):
print x
#output:
('w1','0101')
('w1','0102')
('w1','0103')
('w2','0101')
('w2','0102')
('w2','0103')