1. 程式人生 > >使用DataFrame和numpy生成交叉表(笛卡爾積)

使用DataFrame和numpy生成交叉表(笛卡爾積)

方法一:

import pandas as pd
from pandas import DataFrame

w_df = DataFrame(['w1','w2'],columns=['worker'])

d_df = DataFrame(['0101','0102','0103'],columns=['date'])

new_df = DataFrame(columns=['worker','date'])

for w_index,w_row in w_df.iterrows():
    for d_index,d_row in d_df.iterrows():
        w_data = w_row['worker'
] d_data = d_row['date'] row = DataFrame([dict(worker=w_data, date=d_data), ]) new_df = new_df.append(row,ignore_index=True) print new_df #output date worker 0 0101 w1 1 0102 w1 2 0103 w1 3 0101 w2 4 0102 w2 5 0103 w2

方法二:

date_list = ['2015-01-01','2015-01-02'
,'2015-01-03'] date_count = len(date_list) workers = np.arange(15) df = DataFrame(columns=['w','date']) for worker in workers: cur_arr = np.array([worker] * date_count) cur_arr.shape = (date_count,1) #轉置 date_list = np.array(date_list) date_list.shape = (date_count,1) #轉置 join_arr = np.hstack((cur_arr,date_list)) #水平方向合併
df = df.append(DataFrame(join_arr,columns=['w','date']),ignore_index=True) print df

方法三

import itertools

for x in itertools.product(['w1','w2'],['0101','0102','0103']):
    print x

#output:
('w1','0101')
('w1','0102')
('w1','0103')
('w2','0101')
('w2','0102')
('w2','0103')