In [51]:
import pandas as pd
import numpy as np
import haversine as hs
from datetime import datetime
import matplotlib.pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
In [52]:
data = pd.read_csv('train.csv')
data
Out[52]:
ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Order_Date Time_Orderd Time_Order_picked Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min)
0 0x4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 19-03-2022 11:30:00 11:45:00 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24
1 0xb379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 25-03-2022 19:45:00 19:50:00 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33
2 0x5d6d BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 19-03-2022 08:30:00 08:45:00 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26
3 0x7a6a COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 05-04-2022 18:00:00 18:10:00 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21
4 0x70a2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 26-03-2022 13:30:00 13:45:00 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
45588 0x7c09 JAPRES04DEL01 30 4.8 26.902328 75.794257 26.912328 75.804257 24-03-2022 11:35:00 11:45:00 conditions Windy High 1 Meal motorcycle 0 No Metropolitian (min) 32
45589 0xd641 AGRRES16DEL01 21 4.6 0.000000 0.000000 0.070000 0.070000 16-02-2022 19:55:00 20:10:00 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36
45590 0x4f8d CHENRES08DEL03 30 4.9 13.022394 80.242439 13.052394 80.272439 11-03-2022 23:50:00 00:05:00 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 16
45591 0x5eee COIMBRES11DEL01 20 4.7 11.001753 76.986241 11.041753 77.026241 07-03-2022 13:35:00 13:40:00 conditions Cloudy High 0 Snack motorcycle 1 No Metropolitian (min) 26
45592 0x5fb2 RANCHIRES09DEL02 23 4.9 23.351058 85.325731 23.431058 85.405731 02-03-2022 17:10:00 17:15:00 conditions Fog Medium 2 Snack scooter 1 No Metropolitian (min) 36

45593 rows × 20 columns

In [53]:
data.replace({"NaN": np.nan}, regex=True, inplace = True)
data
Out[53]:
ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Order_Date Time_Orderd Time_Order_picked Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min)
0 0x4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 19-03-2022 11:30:00 11:45:00 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24
1 0xb379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 25-03-2022 19:45:00 19:50:00 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33
2 0x5d6d BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 19-03-2022 08:30:00 08:45:00 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26
3 0x7a6a COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 05-04-2022 18:00:00 18:10:00 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21
4 0x70a2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 26-03-2022 13:30:00 13:45:00 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
45588 0x7c09 JAPRES04DEL01 30 4.8 26.902328 75.794257 26.912328 75.804257 24-03-2022 11:35:00 11:45:00 conditions Windy High 1 Meal motorcycle 0 No Metropolitian (min) 32
45589 0xd641 AGRRES16DEL01 21 4.6 0.000000 0.000000 0.070000 0.070000 16-02-2022 19:55:00 20:10:00 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36
45590 0x4f8d CHENRES08DEL03 30 4.9 13.022394 80.242439 13.052394 80.272439 11-03-2022 23:50:00 00:05:00 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 16
45591 0x5eee COIMBRES11DEL01 20 4.7 11.001753 76.986241 11.041753 77.026241 07-03-2022 13:35:00 13:40:00 conditions Cloudy High 0 Snack motorcycle 1 No Metropolitian (min) 26
45592 0x5fb2 RANCHIRES09DEL02 23 4.9 23.351058 85.325731 23.431058 85.405731 02-03-2022 17:10:00 17:15:00 conditions Fog Medium 2 Snack scooter 1 No Metropolitian (min) 36

45593 rows × 20 columns

In [54]:
data.isna().sum()
Out[54]:
ID                                0
Delivery_person_ID                0
Delivery_person_Age            1854
Delivery_person_Ratings        1908
Restaurant_latitude               0
Restaurant_longitude              0
Delivery_location_latitude        0
Delivery_location_longitude       0
Order_Date                        0
Time_Orderd                    1731
Time_Order_picked                 0
Weatherconditions               616
Road_traffic_density            601
Vehicle_condition                 0
Type_of_order                     0
Type_of_vehicle                   0
multiple_deliveries             993
Festival                        228
City                           1200
Time_taken(min)                   0
dtype: int64
In [55]:
data['Distance'] = 0
data
Out[55]:
ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Order_Date Time_Orderd Time_Order_picked Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance
0 0x4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 19-03-2022 11:30:00 11:45:00 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24 0
1 0xb379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 25-03-2022 19:45:00 19:50:00 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33 0
2 0x5d6d BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 19-03-2022 08:30:00 08:45:00 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26 0
3 0x7a6a COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 05-04-2022 18:00:00 18:10:00 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 0
4 0x70a2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 26-03-2022 13:30:00 13:45:00 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
45588 0x7c09 JAPRES04DEL01 30 4.8 26.902328 75.794257 26.912328 75.804257 24-03-2022 11:35:00 11:45:00 conditions Windy High 1 Meal motorcycle 0 No Metropolitian (min) 32 0
45589 0xd641 AGRRES16DEL01 21 4.6 0.000000 0.000000 0.070000 0.070000 16-02-2022 19:55:00 20:10:00 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36 0
45590 0x4f8d CHENRES08DEL03 30 4.9 13.022394 80.242439 13.052394 80.272439 11-03-2022 23:50:00 00:05:00 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 16 0
45591 0x5eee COIMBRES11DEL01 20 4.7 11.001753 76.986241 11.041753 77.026241 07-03-2022 13:35:00 13:40:00 conditions Cloudy High 0 Snack motorcycle 1 No Metropolitian (min) 26 0
45592 0x5fb2 RANCHIRES09DEL02 23 4.9 23.351058 85.325731 23.431058 85.405731 02-03-2022 17:10:00 17:15:00 conditions Fog Medium 2 Snack scooter 1 No Metropolitian (min) 36 0

45593 rows × 21 columns

haversine 是一个 Python 库,用于计算地球上两点之间的大圆距离。这个距离是指在地球表面两点之间的最短路径,考虑到地球的曲率。这个库在进行地理位置数据的处理和分析时特别有用,比如在确定两个地理坐标(经度和纬度)之间的直线距离时。

In [56]:
def calculate_distance(row):
    loc1 = (row['Restaurant_latitude'], row['Restaurant_longitude'])
    loc2 = (row['Delivery_location_latitude'], row['Delivery_location_longitude'])
    return hs.haversine(loc1, loc2)

# Apply the function to each row
data['Distance'] = data.apply(calculate_distance, axis=1)
In [57]:
data['Time_prep'] = 0
data
Out[57]:
ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Order_Date Time_Orderd Time_Order_picked Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance Time_prep
0 0x4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 19-03-2022 11:30:00 11:45:00 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24 3.025153 0
1 0xb379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 25-03-2022 19:45:00 19:50:00 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33 20.183558 0
2 0x5d6d BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 19-03-2022 08:30:00 08:45:00 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26 1.552760 0
3 0x7a6a COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 05-04-2022 18:00:00 18:10:00 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 7.790412 0
4 0x70a2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 26-03-2022 13:30:00 13:45:00 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30 6.210147 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
45588 0x7c09 JAPRES04DEL01 30 4.8 26.902328 75.794257 26.912328 75.804257 24-03-2022 11:35:00 11:45:00 conditions Windy High 1 Meal motorcycle 0 No Metropolitian (min) 32 1.489848 0
45589 0xd641 AGRRES16DEL01 21 4.6 0.000000 0.000000 0.070000 0.070000 16-02-2022 19:55:00 20:10:00 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36 11.007750 0
45590 0x4f8d CHENRES08DEL03 30 4.9 13.022394 80.242439 13.052394 80.272439 11-03-2022 23:50:00 00:05:00 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 16 4.657202 0
45591 0x5eee COIMBRES11DEL01 20 4.7 11.001753 76.986241 11.041753 77.026241 07-03-2022 13:35:00 13:40:00 conditions Cloudy High 0 Snack motorcycle 1 No Metropolitian (min) 26 6.232402 0
45592 0x5fb2 RANCHIRES09DEL02 23 4.9 23.351058 85.325731 23.431058 85.405731 02-03-2022 17:10:00 17:15:00 conditions Fog Medium 2 Snack scooter 1 No Metropolitian (min) 36 12.074412 0

45593 rows × 22 columns

In [58]:
data.dropna(axis = 0, how = 'all', inplace = True)
data
Out[58]:
ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Order_Date Time_Orderd Time_Order_picked Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance Time_prep
0 0x4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 19-03-2022 11:30:00 11:45:00 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24 3.025153 0
1 0xb379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 25-03-2022 19:45:00 19:50:00 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33 20.183558 0
2 0x5d6d BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 19-03-2022 08:30:00 08:45:00 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26 1.552760 0
3 0x7a6a COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 05-04-2022 18:00:00 18:10:00 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 7.790412 0
4 0x70a2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 26-03-2022 13:30:00 13:45:00 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30 6.210147 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
45588 0x7c09 JAPRES04DEL01 30 4.8 26.902328 75.794257 26.912328 75.804257 24-03-2022 11:35:00 11:45:00 conditions Windy High 1 Meal motorcycle 0 No Metropolitian (min) 32 1.489848 0
45589 0xd641 AGRRES16DEL01 21 4.6 0.000000 0.000000 0.070000 0.070000 16-02-2022 19:55:00 20:10:00 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36 11.007750 0
45590 0x4f8d CHENRES08DEL03 30 4.9 13.022394 80.242439 13.052394 80.272439 11-03-2022 23:50:00 00:05:00 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 16 4.657202 0
45591 0x5eee COIMBRES11DEL01 20 4.7 11.001753 76.986241 11.041753 77.026241 07-03-2022 13:35:00 13:40:00 conditions Cloudy High 0 Snack motorcycle 1 No Metropolitian (min) 26 6.232402 0
45592 0x5fb2 RANCHIRES09DEL02 23 4.9 23.351058 85.325731 23.431058 85.405731 02-03-2022 17:10:00 17:15:00 conditions Fog Medium 2 Snack scooter 1 No Metropolitian (min) 36 12.074412 0

45593 rows × 22 columns

In [59]:
data = data[data["Time_Orderd"].str.contains("NaN") == False].reset_index()
data = data[data["Time_Order_picked"].str.contains("NaN") == False].reset_index() 
data.head(50)
Out[59]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Order_Date Time_Orderd Time_Order_picked Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance Time_prep
0 0 0 0x4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 19-03-2022 11:30:00 11:45:00 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24 3.025153 0
1 1 1 0xb379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 25-03-2022 19:45:00 19:50:00 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33 20.183558 0
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 19-03-2022 08:30:00 08:45:00 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26 1.552760 0
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 05-04-2022 18:00:00 18:10:00 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 7.790412 0
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 26-03-2022 13:30:00 13:45:00 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30 6.210147 0
5 5 5 0x9bb4 HYDRES09DEL03 22 4.8 17.431668 78.408321 17.461668 78.438321 11-03-2022 21:20:00 21:30:00 conditions Cloudy Jam 0 Buffet motorcycle 1 No Urban (min) 26 4.610372 0
6 6 6 0x95b4 RANCHIRES15DEL01 33 4.7 23.369746 85.339820 23.479746 85.449820 04-03-2022 19:15:00 19:30:00 conditions Fog Jam 1 Meal scooter 1 No Metropolitian (min) 40 16.600384 0
7 7 7 0x9eb2 MYSRES15DEL02 35 4.6 12.352058 76.606650 12.482058 76.736650 14-03-2022 17:25:00 17:30:00 conditions Cloudy Medium 2 Meal motorcycle 1 No Metropolitian (min) 32 20.205281 0
8 8 8 0x1102 HYDRES05DEL02 22 4.8 17.433809 78.386744 17.563809 78.516744 20-03-2022 20:55:00 21:05:00 conditions Stormy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 34 19.975548 0
9 9 9 0xcdcd DEHRES17DEL01 36 4.2 30.327968 78.046106 30.397968 78.116106 12-02-2022 21:55:00 22:10:00 conditions Fog Jam 2 Snack motorcycle 3 No Metropolitian (min) 46 10.280596 0
10 10 10 0xd987 KOCRES16DEL01 21 4.7 10.003064 76.307589 10.043064 76.347589 13-02-2022 14:55:00 15:05:00 conditions Stormy High 1 Meal motorcycle 1 No Metropolitian (min) 23 6.242327 0
11 11 11 0x2784 PUNERES13DEL03 23 4.7 18.562450 73.916619 18.652450 74.006619 04-03-2022 17:30:00 17:40:00 conditions Sandstorms Medium 1 Drinks scooter 1 No Metropolitian (min) 21 13.787879 0
12 12 12 0xc8b6 LUDHRES15DEL02 34 4.3 30.899584 75.809346 30.919584 75.829346 13-02-2022 09:20:00 09:30:00 conditions Sandstorms Low 0 Buffet motorcycle 0 No Metropolitian (min) 20 2.930262 0
13 13 13 0xdb64 KNPRES14DEL02 24 4.7 26.463504 80.372929 26.593504 80.502929 14-02-2022 19:50:00 20:05:00 conditions Fog Jam 1 Snack scooter 1 No Metropolitian (min) 41 19.396645 0
14 14 14 0x3af3 MUMRES15DEL03 29 4.5 19.176269 72.836721 19.266269 72.926721 02-04-2022 20:25:00 20:35:00 conditions Sandstorms Jam 2 Buffet electric_scooter 1 No Metropolitian (min) 20 13.763996 0
15 15 15 0x3aab MYSRES01DEL01 35 4 12.311072 76.654878 12.351072 76.694878 01-03-2022 14:55:00 15:10:00 conditions Windy High 1 Meal scooter 1 No Metropolitian (min) 33 6.218010 0
16 16 16 0x689b PUNERES20DEL01 33 4.2 18.592718 73.773572 18.702718 73.883572 16-03-2022 20:30:00 20:40:00 conditions Sandstorms Jam 2 Snack motorcycle 1 No Metropolitian (min) 40 16.849963 0
17 17 17 0x6f67 HYDRES14DEL01 34 4.9 17.426228 78.407495 17.496228 78.477495 20-03-2022 20:40:00 20:50:00 conditions Cloudy Jam 0 Snack motorcycle NaN No Metropolitian (min) 41 10.757124 0
18 18 18 0xc9cf KOLRES15DEL03 21 4.7 22.552672 88.352885 22.582672 88.382885 15-02-2022 21:15:00 21:30:00 conditions Windy Jam 0 Meal motorcycle 1 No Urban (min) 15 4.540581 0
19 19 19 0x36b8 PUNERES19DEL02 25 4.1 18.563934 73.915367 18.643935 73.995367 16-03-2022 20:20:00 20:25:00 conditions Sandstorms Jam 0 Snack motorcycle 2 No Metropolitian (min) 36 12.256093 0
20 20 20 0x5795 RANCHIRES06DEL02 31 4.7 23.357804 85.325146 23.487804 85.455146 10-03-2022 22:30:00 22:45:00 conditions Sandstorms Low 2 Meal electric_scooter 0 No Metropolitian (min) 26 19.618766 0
21 21 21 0x6c6b COIMBRES13DEL01 37 5 11.003669 76.976494 11.013669 76.986494 11-03-2022 08:15:00 08:30:00 conditions Sandstorms Low 1 Snack motorcycle 1 No Metropolitian (min) 20 1.558134 0
22 22 22 0xb816 CHENRES19DEL02 33 4.3 12.986047 80.218114 13.116047 80.348114 27-03-2022 19:30:00 19:45:00 conditions Windy Jam 2 Meal scooter 1 No Metropolitian (min) 39 20.180663 0
23 23 23 0x539b MUMRES02DEL01 25 4 19.221315 72.862381 19.261315 72.902381 26-03-2022 12:25:00 12:30:00 conditions Cloudy High 1 Buffet motorcycle 1 No Metropolitian (min) 34 6.116980 0
24 24 24 0xa1b2 CHENRES01DEL01 29 4.5 13.005801 80.250744 13.115801 80.360744 27-03-2022 18:35:00 18:50:00 conditions Sunny Medium 2 Meal electric_scooter 1 No Metropolitian (min) 15 17.075618 0
25 25 25 0x3231 JAPRES16DEL03 27 5 26.849596 75.800512 26.879596 75.830512 05-04-2022 20:35:00 20:40:00 conditions Stormy Jam 0 Snack motorcycle 0 No Urban (min) 18 4.470293 0
26 26 26 0x8bc0 SURRES15DEL03 35 4.3 21.160522 72.771477 21.250522 72.861477 12-03-2022 23:20:00 23:30:00 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 38 13.682063 0
27 27 27 0x2288 BANGRES09DEL03 32 4 12.934179 77.615797 13.024179 77.705797 16-03-2022 21:20:00 21:35:00 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 47 13.973202 0
28 28 28 0x3c5e PUNERES04DEL01 23 4.8 18.514210 73.838429 18.624210 73.948429 02-04-2022 23:35:00 23:45:00 conditions Windy Low 2 Buffet electric_scooter 0 No Urban (min) 12 16.853642 0
29 29 29 0x3e60 COIMBRES02DEL03 31 4.8 11.022477 76.995667 11.052477 77.025667 01-04-2022 22:35:00 22:50:00 conditions Sandstorms Low 2 Drinks motorcycle 1 No Metropolitian (min) 26 4.674179 0
30 30 30 0xbff SURRES16DEL02 36 4.1 21.160437 72.774209 21.210437 72.824209 05-03-2022 22:35:00 22:40:00 conditions Stormy Low 0 Drinks motorcycle 1 No Urban (min) 22 7.601627 0
31 31 31 0xd936 GOARES15DEL02 26 4.3 15.513150 73.783460 15.563150 73.833460 11-02-2022 23:25:00 23:35:00 conditions Sandstorms Low 0 Buffet motorcycle 0 No Urban (min) 21 7.720335 0
32 32 32 0xd681 GOARES07DEL01 38 4.9 15.561295 73.749478 15.601295 73.789478 11-02-2022 13:35:00 13:40:00 conditions Cloudy High 1 Drinks scooter 1 No Urban (min) 25 6.175645 0
33 33 33 0x2876 RANCHIRES02DEL01 32 3.5 0.000000 0.000000 0.110000 0.110000 08-03-2022 21:35:00 21:45:00 conditions Stormy Jam 1 Snack scooter 0 No Urban (min) 35 17.297890 0
34 34 34 0x30c8 PUNERES19DEL02 32 4.6 18.563934 73.915367 18.693935 74.045367 02-04-2022 22:35:00 22:45:00 conditions Cloudy Low 2 Drinks scooter 1 No Metropolitian (min) 30 19.914714 0
35 35 35 0xb843 PUNERES02DEL01 33 4.9 18.551440 73.804855 18.621440 73.874855 08-03-2022 18:55:00 19:10:00 conditions Sunny Medium 1 Snack motorcycle 1 No Metropolitian (min) 22 10.724533 0
36 36 36 0xb3a0 PUNERES18DEL01 20 4.7 18.593481 73.785901 18.633481 73.825901 03-04-2022 14:15:00 14:25:00 conditions Windy High 1 Snack scooter 0 No Urban (min) 10 6.127844 0
37 37 37 0x6531 SURRES08DEL01 20 4.8 21.173343 72.792731 21.183343 72.802731 30-03-2022 11:00:00 11:10:00 conditions Sandstorms Low 2 Meal scooter 1 No Metropolitian (min) 19 1.520359 0
38 38 38 0x4bda HYDRES17DEL02 35 5 17.451976 78.385883 17.471976 78.405883 01-04-2022 09:45:00 09:55:00 conditions Sunny Low 2 Snack scooter 1 No Urban (min) 11 3.073458 0
39 39 39 0x9d26 BANGRES17DEL02 26 4.9 12.972532 77.608179 12.992532 77.628179 28-03-2022 08:40:00 08:55:00 conditions Stormy Low 2 Buffet scooter 0 No Metropolitian (min) 11 3.105136 0
40 40 40 0x9b18 BANGRES17DEL01 22 4.8 12.972532 77.608179 13.042532 77.678179 18-03-2022 23:00:00 23:10:00 conditions Fog Low 1 Snack motorcycle 1 No Metropolitian (min) 28 10.867442 0
41 41 41 0x5d99 CHENRES11DEL01 35 4.3 13.064181 80.236442 13.134181 80.306442 14-03-2022 17:25:00 17:30:00 conditions Cloudy Medium 1 Snack motorcycle 1 No Metropolitian (min) 33 10.865480 0
42 42 43 0xb796 SURRES17DEL03 37 4.7 21.149569 72.772697 21.239569 72.862697 04-04-2022 19:45:00 19:50:00 conditions Sandstorms Jam 0 Snack motorcycle 3 No Metropolitian (min) 52 13.682535 0
43 43 44 0x85b4 MUMRES07DEL01 28 4.6 19.091458 72.827808 19.201458 72.937808 08-03-2022 19:10:00 19:25:00 conditions Stormy Jam 1 Snack scooter 1 No Metropolitian (min) 22 16.826265 0
44 44 45 0xc644 KOLRES16DEL02 37 4.9 22.539129 88.365507 22.559129 88.385507 13-02-2022 10:55:00 11:00:00 conditions Fog Low 1 Snack scooter 1 No Urban (min) 16 3.027241 0
45 45 46 0x6999 MUMRES02DEL03 23 4.9 19.221315 72.862381 19.281315 72.922381 24-03-2022 21:40:00 21:45:00 conditions Sunny Jam 2 Drinks electric_scooter 0 No Metropolitian (min) 11 9.175207 0
46 46 47 0x63b6 BANGRES05DEL03 27 4.9 12.970324 77.645748 13.030324 77.705748 19-03-2022 19:00:00 19:15:00 conditions Windy Medium 1 Drinks scooter 1 No Metropolitian (min) 25 9.315082 0
47 47 48 0xa30b CHENRES11DEL01 37 4.8 13.064181 80.236442 13.104181 80.276442 09-03-2022 16:45:00 16:55:00 conditions Cloudy Medium 0 Drinks motorcycle 1 No Urban (min) 26 6.209030 0
48 48 49 0x3556 SURRES09DEL01 33 5 21.175975 72.795503 21.185975 72.805503 19-03-2022 11:30:00 11:40:00 conditions Sunny High 2 Snack scooter 1 No Metropolitian (min) 18 1.520347 0
49 49 50 0x5554 RANCHIRES15DEL01 31 4.9 23.369746 85.339820 23.409746 85.379820 03-04-2022 15:10:00 15:15:00 conditions Fog Medium 1 Snack motorcycle 1 No Metropolitian (min) 16 6.037234 0
In [60]:
# 计算配送时间
for i in range(len(data)):
    t1 = datetime.strptime(data['Time_Orderd'][i], "%H:%M:%S")
    t2  =datetime.strptime(data['Time_Order_picked'][i], "%H:%M:%S")
 
    diff = t2-t1
    sec = diff.total_seconds()
    minute = sec/60
    data['Time_prep'][i] = minute

data
Out[60]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Restaurant_latitude Restaurant_longitude Delivery_location_latitude Delivery_location_longitude Order_Date Time_Orderd Time_Order_picked Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance Time_prep
0 0 0 0x4607 INDORES13DEL02 37 4.9 22.745049 75.892471 22.765049 75.912471 19-03-2022 11:30:00 11:45:00 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24 3.025153 15
1 1 1 0xb379 BANGRES18DEL02 34 4.5 12.913041 77.683237 13.043041 77.813237 25-03-2022 19:45:00 19:50:00 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33 20.183558 5
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 12.914264 77.678400 12.924264 77.688400 19-03-2022 08:30:00 08:45:00 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26 1.552760 15
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 11.003669 76.976494 11.053669 77.026494 05-04-2022 18:00:00 18:10:00 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 7.790412 10
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 12.972793 80.249982 13.012793 80.289982 26-03-2022 13:30:00 13:45:00 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30 6.210147 15
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 26.902328 75.794257 26.912328 75.804257 24-03-2022 11:35:00 11:45:00 conditions Windy High 1 Meal motorcycle 0 No Metropolitian (min) 32 1.489848 10
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 0.000000 0.000000 0.070000 0.070000 16-02-2022 19:55:00 20:10:00 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36 11.007750 15
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 13.022394 80.242439 13.052394 80.272439 11-03-2022 23:50:00 00:05:00 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 16 4.657202 -1425
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 11.001753 76.986241 11.041753 77.026241 07-03-2022 13:35:00 13:40:00 conditions Cloudy High 0 Snack motorcycle 1 No Metropolitian (min) 26 6.232402 5
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 23.351058 85.325731 23.431058 85.405731 02-03-2022 17:10:00 17:15:00 conditions Fog Medium 2 Snack scooter 1 No Metropolitian (min) 36 12.074412 5

43862 rows × 24 columns

In [61]:
data = data.drop(['Restaurant_latitude', 'Restaurant_longitude', 'Delivery_location_latitude', 'Delivery_location_longitude'], axis=1)
data = data.drop(['Time_Orderd', 'Time_Order_picked'], axis=1)
data
Out[61]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Order_Date Weatherconditions Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance Time_prep
0 0 0 0x4607 INDORES13DEL02 37 4.9 19-03-2022 conditions Sunny High 2 Snack motorcycle 0 No Urban (min) 24 3.025153 15
1 1 1 0xb379 BANGRES18DEL02 34 4.5 25-03-2022 conditions Stormy Jam 2 Snack scooter 1 No Metropolitian (min) 33 20.183558 5
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 19-03-2022 conditions Sandstorms Low 0 Drinks motorcycle 1 No Urban (min) 26 1.552760 15
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 05-04-2022 conditions Sunny Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 7.790412 10
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 26-03-2022 conditions Cloudy High 1 Snack scooter 1 No Metropolitian (min) 30 6.210147 15
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 24-03-2022 conditions Windy High 1 Meal motorcycle 0 No Metropolitian (min) 32 1.489848 10
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 16-02-2022 conditions Windy Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36 11.007750 15
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 11-03-2022 conditions Cloudy Low 1 Drinks scooter 0 No Metropolitian (min) 16 4.657202 -1425
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 07-03-2022 conditions Cloudy High 0 Snack motorcycle 1 No Metropolitian (min) 26 6.232402 5
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 02-03-2022 conditions Fog Medium 2 Snack scooter 1 No Metropolitian (min) 36 12.074412 5

43862 rows × 18 columns

In [62]:
# 将一个名为data的数据框(DataFrame)中的Weatherconditions列的值进行处理,然后将处理后的结果存储到一个名为Weather的新列中,最后删除原始的Weatherconditions列。
data['Weather'] = 0
for i in range(len(data)):
    str1 = data['Weatherconditions'][i]
    cond = str1.split(' ')
    data['Weather'][i] = cond[1]

data = data.drop(['Weatherconditions'], axis=1)
In [63]:
data = data.drop(['Order_Date'], axis=1)
data
Out[63]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance Time_prep Weather
0 0 0 0x4607 INDORES13DEL02 37 4.9 High 2 Snack motorcycle 0 No Urban (min) 24 3.025153 15 Sunny
1 1 1 0xb379 BANGRES18DEL02 34 4.5 Jam 2 Snack scooter 1 No Metropolitian (min) 33 20.183558 5 Stormy
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 Low 0 Drinks motorcycle 1 No Urban (min) 26 1.552760 15 Sandstorms
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 7.790412 10 Sunny
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 High 1 Snack scooter 1 No Metropolitian (min) 30 6.210147 15 Cloudy
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 High 1 Meal motorcycle 0 No Metropolitian (min) 32 1.489848 10 Windy
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36 11.007750 15 Windy
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 Low 1 Drinks scooter 0 No Metropolitian (min) 16 4.657202 -1425 Cloudy
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 High 0 Snack motorcycle 1 No Metropolitian (min) 26 6.232402 5 Cloudy
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 Medium 2 Snack scooter 1 No Metropolitian (min) 36 12.074412 5 Fog

43862 rows × 17 columns

In [64]:
data['Time_taken'] = 0
for i in range(len(data)):
    list2 = data['Time_taken(min)'][i]
    str2 = list2.split(' ')
    data['Time_taken'][i] = int(str2[1])
data
Out[64]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Time_taken(min) Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 High 2 Snack motorcycle 0 No Urban (min) 24 3.025153 15 Sunny 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 Jam 2 Snack scooter 1 No Metropolitian (min) 33 20.183558 5 Stormy 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 Low 0 Drinks motorcycle 1 No Urban (min) 26 1.552760 15 Sandstorms 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 Medium 0 Buffet motorcycle 1 No Metropolitian (min) 21 7.790412 10 Sunny 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 High 1 Snack scooter 1 No Metropolitian (min) 30 6.210147 15 Cloudy 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 High 1 Meal motorcycle 0 No Metropolitian (min) 32 1.489848 10 Windy 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 Jam 0 Buffet motorcycle 1 No Metropolitian (min) 36 11.007750 15 Windy 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 Low 1 Drinks scooter 0 No Metropolitian (min) 16 4.657202 -1425 Cloudy 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 High 0 Snack motorcycle 1 No Metropolitian (min) 26 6.232402 5 Cloudy 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 Medium 2 Snack scooter 1 No Metropolitian (min) 36 12.074412 5 Fog 36

43862 rows × 18 columns

In [65]:
data = data.drop(['Time_taken(min)'], axis=1)
data
Out[65]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 High 2 Snack motorcycle 0 No Urban 3.025153 15 Sunny 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 Jam 2 Snack scooter 1 No Metropolitian 20.183558 5 Stormy 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 Low 0 Drinks motorcycle 1 No Urban 1.552760 15 Sandstorms 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 Medium 0 Buffet motorcycle 1 No Metropolitian 7.790412 10 Sunny 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 High 1 Snack scooter 1 No Metropolitian 6.210147 15 Cloudy 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 High 1 Meal motorcycle 0 No Metropolitian 1.489848 10 Windy 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 Jam 0 Buffet motorcycle 1 No Metropolitian 11.007750 15 Windy 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 Low 1 Drinks scooter 0 No Metropolitian 4.657202 -1425 Cloudy 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 High 0 Snack motorcycle 1 No Metropolitian 6.232402 5 Cloudy 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 Medium 2 Snack scooter 1 No Metropolitian 12.074412 5 Fog 36

43862 rows × 17 columns

In [67]:
order_type = data['Type_of_order'].unique()
for i in range(len(order_type)):
    data['Type_of_order'] = data['Type_of_order'].replace(order_type[i], i)
data
Out[67]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 High 2 0 motorcycle 0 No Urban 3.025153 15 Sunny 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 Jam 2 0 scooter 1 No Metropolitian 20.183558 5 Stormy 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 Low 0 1 motorcycle 1 No Urban 1.552760 15 Sandstorms 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 Medium 0 2 motorcycle 1 No Metropolitian 7.790412 10 Sunny 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 High 1 0 scooter 1 No Metropolitian 6.210147 15 Cloudy 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 High 1 3 motorcycle 0 No Metropolitian 1.489848 10 Windy 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 Jam 0 2 motorcycle 1 No Metropolitian 11.007750 15 Windy 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 Low 1 1 scooter 0 No Metropolitian 4.657202 -1425 Cloudy 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 High 0 0 motorcycle 1 No Metropolitian 6.232402 5 Cloudy 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 Medium 2 0 scooter 1 No Metropolitian 12.074412 5 Fog 36

43862 rows × 17 columns

In [70]:
data['Type_of_vehicle'] = data['Type_of_vehicle'].replace('motorcycle ', 0)
data['Type_of_vehicle'] = data['Type_of_vehicle'].replace('scooter ', 1)
data['Type_of_vehicle'] = data['Type_of_vehicle'].replace('electric_scooter ', 2)
data
Out[70]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 High 2 0 0 0 No Urban 3.025153 15 Sunny 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 Jam 2 0 1 1 No Metropolitian 20.183558 5 Stormy 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 Low 0 1 0 1 No Urban 1.552760 15 Sandstorms 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 Medium 0 2 0 1 No Metropolitian 7.790412 10 Sunny 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 High 1 0 1 1 No Metropolitian 6.210147 15 Cloudy 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 High 1 3 0 0 No Metropolitian 1.489848 10 Windy 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 Jam 0 2 0 1 No Metropolitian 11.007750 15 Windy 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 Low 1 1 1 0 No Metropolitian 4.657202 -1425 Cloudy 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 High 0 0 0 1 No Metropolitian 6.232402 5 Cloudy 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 Medium 2 0 1 1 No Metropolitian 12.074412 5 Fog 36

43862 rows × 17 columns

In [71]:
road_traffic = data['Road_traffic_density'].unique()
for i in range(len(road_traffic)):
    data['Road_traffic_density'] = data['Road_traffic_density'].replace(road_traffic[i], i)
data
Out[71]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 0 2 0 0 0 No Urban 3.025153 15 Sunny 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 1 2 0 1 1 No Metropolitian 20.183558 5 Stormy 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 2 0 1 0 1 No Urban 1.552760 15 Sandstorms 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 3 0 2 0 1 No Metropolitian 7.790412 10 Sunny 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 0 1 0 1 1 No Metropolitian 6.210147 15 Cloudy 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 0 1 3 0 0 No Metropolitian 1.489848 10 Windy 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 1 0 2 0 1 No Metropolitian 11.007750 15 Windy 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 2 1 1 1 0 No Metropolitian 4.657202 -1425 Cloudy 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 0 0 0 0 1 No Metropolitian 6.232402 5 Cloudy 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 3 2 0 1 1 No Metropolitian 12.074412 5 Fog 36

43862 rows × 17 columns

In [72]:
festival = data['Festival'].unique()
for i in range(len(festival)):
    data['Festival'] = data['Festival'].replace(festival[i], i)
data
Out[72]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 0 2 0 0 0 0.0 Urban 3.025153 15 Sunny 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 1 2 0 1 1 0.0 Metropolitian 20.183558 5 Stormy 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 2 0 1 0 1 0.0 Urban 1.552760 15 Sandstorms 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 3 0 2 0 1 0.0 Metropolitian 7.790412 10 Sunny 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 0 1 0 1 1 0.0 Metropolitian 6.210147 15 Cloudy 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 0 1 3 0 0 0.0 Metropolitian 1.489848 10 Windy 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 1 0 2 0 1 0.0 Metropolitian 11.007750 15 Windy 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 2 1 1 1 0 0.0 Metropolitian 4.657202 -1425 Cloudy 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 0 0 0 0 1 0.0 Metropolitian 6.232402 5 Cloudy 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 3 2 0 1 1 0.0 Metropolitian 12.074412 5 Fog 36

43862 rows × 17 columns

In [73]:
city = data['City'].unique()
for i in range(len(city)):
    data['City'] = data['City'].replace(city[i], i)
data
Out[73]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 0 2 0 0 0 0.0 0.0 3.025153 15 Sunny 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 1 2 0 1 1 0.0 1.0 20.183558 5 Stormy 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 2 0 1 0 1 0.0 0.0 1.552760 15 Sandstorms 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 3 0 2 0 1 0.0 1.0 7.790412 10 Sunny 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 0 1 0 1 1 0.0 1.0 6.210147 15 Cloudy 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 0 1 3 0 0 0.0 1.0 1.489848 10 Windy 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 1 0 2 0 1 0.0 1.0 11.007750 15 Windy 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 2 1 1 1 0 0.0 1.0 4.657202 -1425 Cloudy 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 0 0 0 0 1 0.0 1.0 6.232402 5 Cloudy 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 3 2 0 1 1 0.0 1.0 12.074412 5 Fog 36

43862 rows × 17 columns

In [74]:
weather = data['Weather'].unique()
for i in range(len(weather)):
    data['Weather'] = data['Weather'].replace(weather[i], i)
data
Out[74]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 0 2 0 0 0 0.0 0.0 3.025153 15 0 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 1 2 0 1 1 0.0 1.0 20.183558 5 1 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 2 0 1 0 1 0.0 0.0 1.552760 15 2 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 3 0 2 0 1 0.0 1.0 7.790412 10 0 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 0 1 0 1 1 0.0 1.0 6.210147 15 3 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 0 1 3 0 0 0.0 1.0 1.489848 10 5 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 1 0 2 0 1 0.0 1.0 11.007750 15 5 36
43859 43859 45590 0x4f8d CHENRES08DEL03 30 4.9 2 1 1 1 0 0.0 1.0 4.657202 -1425 3 16
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 0 0 0 0 1 0.0 1.0 6.232402 5 3 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 3 2 0 1 1 0.0 1.0 12.074412 5 4 36

43862 rows × 17 columns

In [75]:
data = data[data['Time_prep']>0]
data = data[data['Delivery_person_Age']!="NaN "]
data = data[data['Delivery_person_Ratings']!="NaN "]
data = data[data['Road_traffic_density']!="NaN "]
data = data[data['Vehicle_condition']!="NaN"]
data = data[data['Type_of_order']!="NaN"]
data = data[data['Type_of_vehicle']!="NaN"]
data = data[data['multiple_deliveries']!="NaN"]
data = data[data['Festival']!="NaN"]
data = data[data['City']!="NaN"]
data = data[data['Distance']!="NaN"]
data = data[data['Time_prep']!="NaN"]
data = data[data['Weather']!="NaN"]
data
Out[75]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37 4.9 0 2 0 0 0 0.0 0.0 3.025153 15 0 24
1 1 1 0xb379 BANGRES18DEL02 34 4.5 1 2 0 1 1 0.0 1.0 20.183558 5 1 33
2 2 2 0x5d6d BANGRES19DEL01 23 4.4 2 0 1 0 1 0.0 0.0 1.552760 15 2 26
3 3 3 0x7a6a COIMBRES13DEL02 38 4.7 3 0 2 0 1 0.0 1.0 7.790412 10 0 21
4 4 4 0x70a2 CHENRES12DEL01 32 4.6 0 1 0 1 1 0.0 1.0 6.210147 15 3 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43856 43856 45587 0x1178 RANCHIRES16DEL01 35 4.2 1 2 1 0 1 0.0 1.0 16.600295 10 5 33
43857 43857 45588 0x7c09 JAPRES04DEL01 30 4.8 0 1 3 0 0 0.0 1.0 1.489848 10 5 32
43858 43858 45589 0xd641 AGRRES16DEL01 21 4.6 1 0 2 0 1 0.0 1.0 11.007750 15 5 36
43860 43860 45591 0x5eee COIMBRES11DEL01 20 4.7 0 0 0 0 1 0.0 1.0 6.232402 5 3 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23 4.9 3 2 0 1 1 0.0 1.0 12.074412 5 4 36

43031 rows × 17 columns

In [76]:
data['Delivery_person_Age'] = pd.to_numeric(data['Delivery_person_Age'])
data['Delivery_person_Ratings'] = pd.to_numeric(data['Delivery_person_Ratings'])
data['multiple_deliveries'] = pd.to_numeric(data['multiple_deliveries'])
data
Out[76]:
level_0 index ID Delivery_person_ID Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 0 0x4607 INDORES13DEL02 37.0 4.9 0 2 0 0 0.0 0.0 0.0 3.025153 15 0 24
1 1 1 0xb379 BANGRES18DEL02 34.0 4.5 1 2 0 1 1.0 0.0 1.0 20.183558 5 1 33
2 2 2 0x5d6d BANGRES19DEL01 23.0 4.4 2 0 1 0 1.0 0.0 0.0 1.552760 15 2 26
3 3 3 0x7a6a COIMBRES13DEL02 38.0 4.7 3 0 2 0 1.0 0.0 1.0 7.790412 10 0 21
4 4 4 0x70a2 CHENRES12DEL01 32.0 4.6 0 1 0 1 1.0 0.0 1.0 6.210147 15 3 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
43856 43856 45587 0x1178 RANCHIRES16DEL01 35.0 4.2 1 2 1 0 1.0 0.0 1.0 16.600295 10 5 33
43857 43857 45588 0x7c09 JAPRES04DEL01 30.0 4.8 0 1 3 0 0.0 0.0 1.0 1.489848 10 5 32
43858 43858 45589 0xd641 AGRRES16DEL01 21.0 4.6 1 0 2 0 1.0 0.0 1.0 11.007750 15 5 36
43860 43860 45591 0x5eee COIMBRES11DEL01 20.0 4.7 0 0 0 0 1.0 0.0 1.0 6.232402 5 3 26
43861 43861 45592 0x5fb2 RANCHIRES09DEL02 23.0 4.9 3 2 0 1 1.0 0.0 1.0 12.074412 5 4 36

43031 rows × 17 columns

In [77]:
data = data.drop(['Delivery_person_ID', 'ID', 'index', 'level_0'], axis=1)
data
Out[77]:
Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 37.0 4.9 0 2 0 0 0.0 0.0 0.0 3.025153 15 0 24
1 34.0 4.5 1 2 0 1 1.0 0.0 1.0 20.183558 5 1 33
2 23.0 4.4 2 0 1 0 1.0 0.0 0.0 1.552760 15 2 26
3 38.0 4.7 3 0 2 0 1.0 0.0 1.0 7.790412 10 0 21
4 32.0 4.6 0 1 0 1 1.0 0.0 1.0 6.210147 15 3 30
... ... ... ... ... ... ... ... ... ... ... ... ... ...
43856 35.0 4.2 1 2 1 0 1.0 0.0 1.0 16.600295 10 5 33
43857 30.0 4.8 0 1 3 0 0.0 0.0 1.0 1.489848 10 5 32
43858 21.0 4.6 1 0 2 0 1.0 0.0 1.0 11.007750 15 5 36
43860 20.0 4.7 0 0 0 0 1.0 0.0 1.0 6.232402 5 3 26
43861 23.0 4.9 3 2 0 1 1.0 0.0 1.0 12.074412 5 4 36

43031 rows × 13 columns

In [78]:
data['Distance'].describe()
data = data[data['Distance']<100]
data = data.reset_index()
data
Out[78]:
index Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather Time_taken
0 0 37.0 4.9 0 2 0 0 0.0 0.0 0.0 3.025153 15 0 24
1 1 34.0 4.5 1 2 0 1 1.0 0.0 1.0 20.183558 5 1 33
2 2 23.0 4.4 2 0 1 0 1.0 0.0 0.0 1.552760 15 2 26
3 3 38.0 4.7 3 0 2 0 1.0 0.0 1.0 7.790412 10 0 21
4 4 32.0 4.6 0 1 0 1 1.0 0.0 1.0 6.210147 15 3 30
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
42872 43856 35.0 4.2 1 2 1 0 1.0 0.0 1.0 16.600295 10 5 33
42873 43857 30.0 4.8 0 1 3 0 0.0 0.0 1.0 1.489848 10 5 32
42874 43858 21.0 4.6 1 0 2 0 1.0 0.0 1.0 11.007750 15 5 36
42875 43860 20.0 4.7 0 0 0 0 1.0 0.0 1.0 6.232402 5 3 26
42876 43861 23.0 4.9 3 2 0 1 1.0 0.0 1.0 12.074412 5 4 36

42877 rows × 14 columns

In [79]:
data.isna().sum()
Out[79]:
index                        0
Delivery_person_Age        211
Delivery_person_Ratings    257
Road_traffic_density         0
Vehicle_condition            0
Type_of_order                0
Type_of_vehicle              0
multiple_deliveries        927
Festival                     0
City                         0
Distance                     0
Time_prep                    0
Weather                      0
Time_taken                   0
dtype: int64
In [80]:
data['multiple_deliveries'].head(30)
Out[80]:
0     0.0
1     1.0
2     1.0
3     1.0
4     1.0
5     1.0
6     1.0
7     1.0
8     1.0
9     3.0
10    1.0
11    1.0
12    0.0
13    1.0
14    1.0
15    1.0
16    1.0
17    NaN
18    1.0
19    2.0
20    0.0
21    1.0
22    1.0
23    1.0
24    1.0
25    0.0
26    0.0
27    1.0
28    0.0
29    1.0
Name: multiple_deliveries, dtype: float64
In [82]:
data['Delivery_person_Age'] = data['Delivery_person_Age'].fillna(data['Delivery_person_Age'].mean()) 
data['Delivery_person_Ratings'] = data['Delivery_person_Ratings'].fillna(data['Delivery_person_Ratings'].mean()) 
data['multiple_deliveries'].unique()
data['multiple_deliveries'].median()
data['multiple_deliveries'] = data['multiple_deliveries'].fillna(data['multiple_deliveries'].median())
In [83]:
data.isna().sum()
Out[83]:
index                      0
Delivery_person_Age        0
Delivery_person_Ratings    0
Road_traffic_density       0
Vehicle_condition          0
Type_of_order              0
Type_of_vehicle            0
multiple_deliveries        0
Festival                   0
City                       0
Distance                   0
Time_prep                  0
Weather                    0
Time_taken                 0
dtype: int64
In [84]:
import seaborn as sns
In [85]:
plt.figure(figsize=(15,7))
ax = sns.heatmap(data.corr(), annot=True)
In [86]:
columns=data[['Delivery_person_Age','Delivery_person_Ratings','Road_traffic_density','Vehicle_condition','Type_of_order','Time_taken']]
sns.pairplot(columns)
plt.show()
In [ ]:
 
In [87]:
from sklearn.model_selection import train_test_split
In [88]:
x = data.drop(['Time_taken', 'index'], axis=1)
x
Out[88]:
Delivery_person_Age Delivery_person_Ratings Road_traffic_density Vehicle_condition Type_of_order Type_of_vehicle multiple_deliveries Festival City Distance Time_prep Weather
0 37.0 4.9 0 2 0 0 0.0 0.0 0.0 3.025153 15 0
1 34.0 4.5 1 2 0 1 1.0 0.0 1.0 20.183558 5 1
2 23.0 4.4 2 0 1 0 1.0 0.0 0.0 1.552760 15 2
3 38.0 4.7 3 0 2 0 1.0 0.0 1.0 7.790412 10 0
4 32.0 4.6 0 1 0 1 1.0 0.0 1.0 6.210147 15 3
... ... ... ... ... ... ... ... ... ... ... ... ...
42872 35.0 4.2 1 2 1 0 1.0 0.0 1.0 16.600295 10 5
42873 30.0 4.8 0 1 3 0 0.0 0.0 1.0 1.489848 10 5
42874 21.0 4.6 1 0 2 0 1.0 0.0 1.0 11.007750 15 5
42875 20.0 4.7 0 0 0 0 1.0 0.0 1.0 6.232402 5 3
42876 23.0 4.9 3 2 0 1 1.0 0.0 1.0 12.074412 5 4

42877 rows × 12 columns

In [89]:
y = data[['Time_taken']]
y
Out[89]:
Time_taken
0 24
1 33
2 26
3 21
4 30
... ...
42872 33
42873 32
42874 36
42875 26
42876 36

42877 rows × 1 columns

In [90]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
In [91]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
import warnings
In [92]:
warnings.filterwarnings('ignore')

随机森林回归¶

In [93]:
forest = RandomForestRegressor(n_estimators= 200, min_samples_split= 4, min_samples_leaf= 1)
forest.fit(x_train, y_train)
Out[93]:
RandomForestRegressor(min_samples_split=4, n_estimators=200)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestRegressor(min_samples_split=4, n_estimators=200)
In [94]:
forest.score(x_test, y_test)
Out[94]:
0.8366806837953505

线性回归¶

In [95]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x_train, y_train)
reg.score(x_test, y_test)
Out[95]:
0.46966387660478515
In [96]:
from sklearn import linear_model
reg2=linear_model.LassoLars(alpha=.1, normalize=False)
reg2.fit(x_train,y_train)
Out[96]:
LassoLars(alpha=0.1, normalize=False)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LassoLars(alpha=0.1, normalize=False)
In [97]:
reg2.score(x_train, y_train)
Out[97]:
0.46740068119629696