-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfeatureExtra.py
More file actions
100 lines (81 loc) · 4.19 KB
/
featureExtra.py
File metadata and controls
100 lines (81 loc) · 4.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pickle
import numpy as np
import pandas as pd
#from artificiaMapGenerate import GRIDSIZE_ROW, GRIDSIZE_COL
#from trajGenerate import GridMap
"""
this file is used to extract features from the generated-data using kinda RF method from
'A Hybrid Method for Intercity Transport Mode Identification Based on Mobility Features and Sequential Relations Mined from Cellular Signaling Data'
"""
# prepare the test data
# with route name to get ['mode']
"""raw_df = pd.read_csv('data/artificial_traj.csv')
with open('data/artificial_network.pkl', 'rb') as f:
artificial_net = pickle.load(f)
artificial_map = GridMap(GRIDSIZE_ROW, GRIDSIZE_COL, artificial_net)"""
# without route name,in real-world data
raw_df = pd.read_csv('data/artificial_traj_mixed.csv')
# calculate durations between two trajectory records using time,if the traj record is the first record of a traj, the duration is 0
raw_df['duration'] = raw_df.groupby('ID')['time'].diff().fillna(0)
# calculate the distance between two trajectory records using locx and locy, if the traj record is the first record of a traj, the distance is 0
raw_df['dx'] = raw_df.groupby('ID')['locx'].diff().fillna(0)
raw_df['dy'] = raw_df.groupby('ID')['locy'].diff().fillna(0)
raw_df['distance'] = np.sqrt(raw_df['dx'] ** 2 + raw_df['dy'] ** 2)
raw_df['speed'] = 60 *raw_df['distance'] / raw_df['duration'].replace(0, 1)
# calculate the acceleration between two trajectory records using speed, if the speed is 0, the acceleration is 0
raw_df['acc'] = raw_df['speed'].diff().fillna(0) / raw_df['duration'].replace(0, 1)
# calculate cosine between 3 trajectory records using locx and locy,handling the first and last elements by setting their cosine values to 1
raw_df['cos'] = raw_df.groupby('ID').apply(lambda x: (x['locx'].diff().fillna(0) * x['locx'].shift(-1).fillna(0) + x['locy'].diff().fillna(0) * x['locy'].shift(-1).fillna(0)) / (np.sqrt(x['locx'].diff().fillna(0) ** 2 + x['locy'].diff().fillna(0) ** 2) * np.sqrt(x['locx'].shift(-1).fillna(0) ** 2 + x['locy'].shift(-1).fillna(0) ** 2)).replace(0, 1)).reset_index(drop=True)
# method with route name to get ['mode']
"""H = []
O = []
R = []
for each_road in artificial_net:
if each_road['name'].startswith('H'): H.append(each_road['id'])
elif each_road['name'].startswith('O'): O.append(each_road['id'])
else: R.append(each_road['id'])
raw_df['mode'] = raw_df['route_id'].apply(lambda x: 1 if x in H else (2 if x in O else 3))
def isClose(gridmap,x,y,mode):
return gridmap.is_close(x,y,mode)
raw_df['GG'] = raw_df.apply(lambda x: isClose(artificial_map,int(x['locx']),int(x['locy']),1),axis=1)
raw_df['GSD'] = raw_df.apply(lambda x: isClose(artificial_map,int(x['locx']),int(x['locy']),2),axis=1)
raw_df['TG'] = raw_df.apply(lambda x: isClose(artificial_map,int(x['locx']),int(x['locy']),3),axis=1)
raw_df.to_csv('data/artificial_traj_features_2.csv', index=False)"""
# method without route name:
def isClose(net,coord,mode):
if coord not in net:
return 0
def _is_close(code:int,mode:str) -> int:
if mode == 'GG':
return code >> 3 & 1
if mode == 'GSD':
return code >> 2 & 1
if mode == 'TG':
return code >> 1 & 1
if mode == 'TS':
return code >> 6 & 1
for neighbor in net[coord]:
if _is_close(neighbor,mode):
return 1
return 0
def encodeMode(mode:str):
if mode == 'GG':
return 1
if mode == 'GSD':
return 2
if mode == 'TG':
return 3
if mode == 'TS':
return 4
else:
return 0
with open('data/GridModesAdjacentRes.pkl', 'rb') as f:
real_net = pickle.load(f)
raw_df['TG'] = raw_df.apply(lambda x: isClose(real_net,(int(x['locx']),int(x['locy'])),'TG'),axis=1)
raw_df['GSD'] = raw_df.apply(lambda x: isClose(real_net,(int(x['locx']),int(x['locy'])),'GSD'),axis=1)
raw_df['GG'] = raw_df.apply(lambda x: isClose(real_net,(int(x['locx']),int(x['locy'])),'GG'),axis=1)
raw_df['TS'] = raw_df.apply(lambda x: isClose(real_net,(int(x['locx']),int(x['locy'])),'TS'),axis=1)
raw_df['mode'] = raw_df['mode'].apply(lambda x : encodeMode(x))
# set the mode = 0 if speed==0
raw_df['mode'] = raw_df['mode'] * (raw_df['speed'] != 0)
raw_df.to_csv('data/realWorldMixedFeatures.csv')