import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
from bs4 import BeautifulSoup # library for web scrapping  

#!conda install -c conda-forge 地理编码器 --yes
import geocoder

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# 用于显示图像的库
from IPython.display import Image 
from IPython.core.display import HTML 
    
# 将 json 文件转换为 pandas dataframe 库
from pandas.io.json import json_normalize

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


#Foursquare 是一家技术公司，最初以其同名的位置数据和推荐服务应用程序而闻名。Foursquare 应用程序允许用户“签到”到他们访问的不同地点，并分享这些信息与朋友。
CLIENT_ID = 'R01LINGO2WC45KLRLKT3ZHU2QENAO2IPRK2N2ELOHRNK4P3K' # your Foursquare ID
CLIENT_SECRET = '4JT1TWRMXMPLX5IOKNBAFU3L3ARXK4D5JJDPFK1CLRZM2ZVW' # your Foursquare Secret

VERSION = '20180604'
LIMIT = 30

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: R01LINGO2WC45KLRLKT3ZHU2QENAO2IPRK2N2ELOHRNK4P3K
CLIENT_SECRET:4JT1TWRMXMPLX5IOKNBAFU3L3ARXK4D5JJDPFK1CLRZM2ZVW


# 读入数据
df = pd.read_csv("london_crime_by_lsoa.csv")


# 查看数据集的顶行
df.head()


# 仅采用最近一年（2016 年）并丢弃其余年份
df.drop(df.index[df['year'] != 2016], inplace = True)

# 删除犯罪值为空的所有整体
df = df[df.value != 0]

# 重置索引并删除之前的索引
df = df.reset_index(drop=True)


# 数据框的形状
df.shape

(392042, 7)


# 查看数据集的顶部
df.head()


df.columns = ['LSOA_Code', 'Borough','Major_Category','Minor_Category','No_of_Crimes','Year','Month']
df.head()


# 查看数据集的信息
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392042 entries, 0 to 392041
Data columns (total 7 columns):
 #   Column          Non-Null Count   Dtype 
---  ------          --------------   ----- 
 0   LSOA_Code       392042 non-null  object
 1   Borough         392042 non-null  object
 2   Major_Category  392042 non-null  object
 3   Minor_Category  392042 non-null  object
 4   No_of_Crimes    392042 non-null  int64 
 5   Year            392042 non-null  int64 
 6   Month           392042 non-null  int64 
dtypes: int64(3), object(4)
memory usage: 20.9+ MB


df['Borough'].value_counts()

Lambeth                   17605
Southwark                 16560
Croydon                   16254
Newham                    15622
Ealing                    15284
Tower Hamlets             15219
Brent                     14980
Barnet                    14668
Hackney                   14392
Lewisham                  14235
Haringey                  14202
Enfield                   13658
Wandsworth                13498
Westminster               13383
Islington                 13116
Greenwich                 12750
Camden                    12632
Hillingdon                12417
Hounslow                  12316
Waltham Forest            12121
Bromley                   11980
Redbridge                 11490
Hammersmith and Fulham    10281
Barking and Dagenham       9784
Havering                   9699
Kensington and Chelsea     9653
Harrow                     8257
Bexley                     8245
Merton                     8223
Richmond upon Thames       7199
Sutton                     6823
Kingston upon Thames       5374
City of London              122
Name: Borough, dtype: int64


df['Major_Category'].value_counts()

Theft and Handling             129159
Violence Against the Person    123050
Criminal Damage                 48584
Burglary                        43020
Drugs                           21782
Robbery                         14889
Other Notifiable Offences       11558
Name: Major_Category, dtype: int64


London_crime = pd.pivot_table(df,values=['No_of_Crimes'],
                               index=['Borough'],
                               columns=['Major_Category'],
                               aggfunc=np.sum,fill_value=0)
London_crime.head()


# 重置索引
London_crime.reset_index(inplace = True)


# 每个行政区的犯罪总数
London_crime['Total'] = London_crime.sum(axis=1, numeric_only=True)
London_crime.head(33)

C:\Users\1\AppData\Local\Temp\ipykernel_1864\3008334827.py:2: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  London_crime['Total'] = London_crime.sum(axis=1)


London_crime.columns = London_crime.columns.map(''.join)
London_crime.head()


London_crime.columns = ['Borough','Burglary', 'Criminal Damage','Drugs','Other Notifiable Offences',
                        'Robbery','Theft and Handling','Violence Against the Person','Total']
London_crime.head()


# 数据集的形状
London_crime.shape

(33, 9)


# 查看数据框中的列
# London_crime.columns.tolist()


# 从互联网获取数据
wikipedia_link='https://en.wikipedia.org/wiki/List_of_London_boroughs'
raw_wikipedia_page= requests.get(wikipedia_link).text

# 使用 beautiful soup 来解析 HTML/XML 代码。
soup = BeautifulSoup(raw_wikipedia_page,'xml')
print(soup.prettify())


# 提取该网页内的原始表
table = soup.find_all('table', {'class':'wikitable sortable'})
print(table)


London_table = pd.read_html(str(table[0]), index_col=None, header=0)[0]
London_table.head()


# 读入第二个表
London_table1 = pd.read_html(str(table[1]), index_col=None, header=0)[0]

# 重命名列以匹配上一个表以追加表。

London_table1.columns = ['Borough','Inner','Status','Local authority','Political control',
                         'Headquarters','Area (sq mi)','Population (2013 est)[1]','Co-ordinates','Nr. in map']

# 查看表
London_table1


# 将保持一个连续的索引值
# 跨越新附加数据框中的行。

London_table = London_table.append(London_table1, ignore_index = True) 
London_table.head()

C:\Users\1\AppData\Local\Temp\ipykernel_1864\1307134375.py:4: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  London_table = London_table.append(London_table1, ignore_index = True)


London_table.tail()


London_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 53 entries, 0 to 52
Data columns (total 11 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Borough                   53 non-null     object 
 1   Inner                     20 non-null     object 
 2   Status                    7 non-null      object 
 3   Local authority           35 non-null     object 
 4   Political control         35 non-null     object 
 5   Headquarters              35 non-null     object 
 6   Area (sq mi)              35 non-null     object 
 7   Population (2019 est)     34 non-null     object 
 8   Co-ordinates              35 non-null     object 
 9   Nr. in map                35 non-null     object 
 10  Population (2013 est)[1]  1 non-null      float64
dtypes: float64(1), object(10)
memory usage: 4.7+ KB


London_table = London_table.replace('note 1','', regex=True) 
London_table = London_table.replace('note 2','', regex=True) 
London_table = London_table.replace('note 3','', regex=True) 
London_table = London_table.replace('note 4','', regex=True) 
London_table = London_table.replace('note 5','', regex=True) 

# 查看数据集的顶部
London_table.head()


type(London_table)

pandas.core.frame.DataFrame


# 数据框的形状
London_table.shape

(53, 11)


set(df.Borough) - set(London_table.Borough)

{'Barking and Dagenham', 'Greenwich', 'Hammersmith and Fulham'}


print("The index of first borough is",London_table.index[London_table['Borough'] == 'Barking and Dagenham []'].tolist())
print("The index of second borough is",London_table.index[London_table['Borough'] == 'Greenwich []'].tolist())
print("The index of third borough is",London_table.index[London_table['Borough'] == 'Hammersmith and Fulham []'].tolist())

The index of first borough is []
The index of second borough is []
The index of third borough is []


London_table.iloc[0,0] = 'Barking and Dagenham'
London_table.iloc[9,0] = 'Greenwich'
London_table.iloc[11,0] = 'Hammersmith and Fulham'


set(df.Borough) - set(London_table.Borough)

set()


Ld_crime = pd.merge(London_crime, London_table, on='Borough')
Ld_crime.head(10)


Ld_crime.shape

(34, 19)


set(df.Borough) - set(Ld_crime.Borough)

set()


# 数据框的列名称列表
list(Ld_crime)

['Borough',
 'Burglary',
 'Criminal Damage',
 'Drugs',
 'Other Notifiable Offences',
 'Robbery',
 'Theft and Handling',
 'Violence Against the Person',
 'Total',
 'Inner',
 'Status',
 'Local authority',
 'Political control',
 'Headquarters',
 'Area (sq mi)',
 'Population (2019 est)',
 'Co-ordinates',
 'Nr. in map',
 'Population (2013 est)[1]']


columnsTitles = ['Borough','Local authority','Political control','Headquarters',
                 'Area (sq mi)','Population (2013 est)[1]',
                 'Inner','Status',
                 'Burglary','Criminal Damage','Drugs','Other Notifiable Offences',
                 'Robbery','Theft and Handling','Violence Against the Person','Total','Co-ordinates']

Ld_crime = Ld_crime.reindex(columns=columnsTitles)

Ld_crime = Ld_crime[['Borough','Local authority','Political control','Headquarters',
                 'Area (sq mi)','Population (2013 est)[1]','Co-ordinates',
                 'Burglary','Criminal Damage','Drugs','Other Notifiable Offences',
                 'Robbery','Theft and Handling','Violence Against the Person','Total']]

Ld_crime.head()


London_crime.describe()


# 使用内联后端在浏览器中生成绘图
%matplotlib inline 

import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.style.use('ggplot') # optional: for ggplot-like style

# 检查 Matplotlib 的最新版本
print ('Matplotlib version: ', mpl.__version__) # >= 2.0.0

# Matplotlib 和相关绘图模块
import matplotlib.cm as cm
import matplotlib.colors as colors

Matplotlib version:  3.7.1


Ld_crime.columns = list(map(str, Ld_crime.columns))

# 现在让我们检查一下列标签类型
all(isinstance(column, str) for column in Ld_crime.columns)

True


Ld_crime.sort_values(['Total'], ascending = False, axis = 0, inplace = True )

df_top5 = Ld_crime.head() 
df_top5


df_tt = df_top5[['Borough','Total']]

df_tt.set_index('Borough',inplace = True)

ax = df_tt.plot(kind='bar', figsize=(10, 6), rot=0)

ax.set_ylabel('Number of Crimes') # add to x-label to the plot
ax.set_xlabel('Borough') # add y-label to the plot
ax.set_title('London Boroughs with the Highest no. of crime') # add title to the plot

# 创建一个显示百分比的函数。

for p in ax.patches:
    ax.annotate(np.round(p.get_height(),decimals=2), 
                (p.get_x()+p.get_width()/2., p.get_height()), 
                ha='center', 
                va='center', 
                xytext=(0, 10), 
                textcoords='offset points',
                fontsize = 14
               )

plt.show()


Ld_crime.sort_values(['Total'], ascending = True, axis = 0, inplace = True )

df_bot5 = Ld_crime.head() 
df_bot5


df_bt = df_bot5[['Borough','Total']]

df_bt.set_index('Borough',inplace = True)

ax = df_bt.plot(kind='bar', figsize=(10, 6), rot=0)

ax.set_ylabel('Number of Crimes') # add to x-label to the plot
ax.set_xlabel('Borough') # add y-label to the plot
ax.set_title('London Boroughs with the least no. of crime') # add title to the plot

# 创建一个显示百分比的函数。

for p in ax.patches:
    ax.annotate(np.round(p.get_height(),decimals=2), 
                (p.get_x()+p.get_width()/2., p.get_height()), 
                ha='center', 
                va='center', 
                xytext=(0, 10), 
                textcoords='offset points',
                fontsize = 14
               )

plt.show()


df_col = df_bot5[df_bot5['Borough'] == 'City of London']
df_col = df_col[['Borough','Total','Area (sq mi)','Population (2013 est)[1]']]
df_col


df_bc1 =  df_bot5[df_bot5['Borough'] == 'Kingston upon Thames']

df_bc = df_bc1[['Borough','Burglary','Criminal Damage','Drugs','Other Notifiable Offences',
                 'Robbery','Theft and Handling','Violence Against the Person']]


df_bc.set_index('Borough',inplace = True)

ax = df_bc.plot(kind='bar', figsize=(10, 6), rot=0)

ax.set_ylabel('Number of Crimes') # add to x-label to the plot
ax.set_xlabel('Borough') # add y-label to the plot
ax.set_title('London Boroughs with the least no. of crime') # add title to the plot

# 创建一个显示百分比的函数。

for p in ax.patches:
    ax.annotate(np.round(p.get_height(),decimals=2), 
                (p.get_x()+p.get_width()/2., p.get_height()), 
                ha='center', 
                va='center', 
                xytext=(0, 10), 
                textcoords='offset points',
                fontsize = 14
               )

plt.show()


Neighborhood = ['Berrylands','Canbury','Chessington','Coombe','Hook','Kingston upon Thames',
'Kingston Vale','Malden Rushett','Motspur Park','New Malden','Norbiton',
'Old Malden','Seething Wells','Surbiton','Tolworth']

Borough = ['Kingston upon Thames','Kingston upon Thames','Kingston upon Thames','Kingston upon Thames',
          'Kingston upon Thames','Kingston upon Thames','Kingston upon Thames','Kingston upon Thames',
          'Kingston upon Thames','Kingston upon Thames','Kingston upon Thames','Kingston upon Thames',
          'Kingston upon Thames','Kingston upon Thames','Kingston upon Thames']

Latitude = ['','','','','','','','','','','','','','','']
Longitude = ['','','','','','','','','','','','','','','']

df_neigh = {'Neighborhood': Neighborhood,'Borough':Borough,'Latitude': Latitude,'Longitude':Longitude}
kut_neig = pd.DataFrame(data=df_neigh, columns=['Neighborhood', 'Borough', 'Latitude', 'Longitude'], index=None)

kut_neig


Latitude = []
Longitude = []

for i in range(len(Neighborhood)):
    address = '{},London,United Kingdom'.format(Neighborhood[i])
    geolocator = Nominatim(user_agent="London_agent")
    location = geolocator.geocode(address)
    Latitude.append(location.latitude)
    Longitude.append(location.longitude)
print(Latitude, Longitude)

[51.3937811, 51.417250100000004, 51.358336, 51.4194499, 51.3678984, 51.4129277, 51.43185, 51.3410523, 51.3909852, 51.4053347, 51.4099994, 51.382484, 51.3926421, 51.3937557, 51.3788758] [-0.2848024, -0.30563059667487563, -0.2986216, -0.2653985, -0.3071453, -0.3018577, -0.2581379, -0.3190757, -0.2488979, -0.2634066, -0.2873963, -0.2590897, -0.3143662, -0.3033105, -0.2828604]


df_neigh = {'Neighborhood': Neighborhood,'Borough':Borough,'Latitude': Latitude,'Longitude':Longitude}
kut_neig = pd.DataFrame(data=df_neigh, columns=['Neighborhood', 'Borough', 'Latitude', 'Longitude'], index=None)

kut_neig


address = 'Berrylands, London, United Kingdom'

geolocator = Nominatim(user_agent="ld_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Berrylands, London are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Berrylands, London are 51.3937811, -0.2848024.


# 使用纬度和经度值创建纽约地图
map_lon = folium.Map(location=[latitude, longitude], zoom_start=12)

# 向地图添加标记
for lat, lng, borough, neighborhood in zip(kut_neig['Latitude'], kut_neig['Longitude'], kut_neig['Borough'], kut_neig['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_lon)  
    
map_lon


def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
# 创建API请求URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
# 发出 GET 请求
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
# 仅返回每个附近场地的相关信息
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


kut_venues = getNearbyVenues(names=kut_neig['Neighborhood'],
                                   latitudes=kut_neig['Latitude'],
                                   longitudes=kut_neig['Longitude']
                                  )

Berrylands
Canbury
Chessington
Coombe
Hook
Kingston upon Thames
Kingston Vale
Malden Rushett
Motspur Park
New Malden
Norbiton
Old Malden
Seething Wells
Surbiton
Tolworth


print(kut_venues.shape)
kut_venues.head()

(170, 7)


kut_venues.groupby('Neighborhood').count()


print('There are {} uniques categories.'.format(len(kut_venues['Venue Category'].unique())))

There are 61 uniques categories.


# 一种热编码
kut_onehot = pd.get_dummies(kut_venues[['Venue Category']], prefix="", prefix_sep="")

# 将邻域列添加回数据框
kut_onehot['Neighborhood'] = kut_venues['Neighborhood'] 

# 将邻域列移动到第一列
fixed_columns = [kut_onehot.columns[-1]] + list(kut_onehot.columns[:-1])
kut_onehot = kut_onehot[fixed_columns]

kut_onehot.head()


kut_grouped = kut_onehot.groupby('Neighborhood').mean().reset_index()
kut_grouped


kut_grouped.shape

(14, 62)


num_top_venues = 5

for hood in kut_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = kut_grouped[kut_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berrylands----
                   venue  freq
0        Nature Preserve  0.25
1                   Park  0.25
2               Bus Stop  0.25
3                   Café  0.25
4  Arts and Crafts Store  0.00


----Canbury----
               venue  freq
0                Pub  0.29
1  Indian Restaurant  0.07
2               Café  0.07
3   Asian Restaurant  0.07
4              Hotel  0.07


----Coombe----
                      venue  freq
0  Food and Beverage Retail   1.0
1     Arts and Crafts Store   0.0
2                Public Art   0.0
3          Kebab Restaurant   0.0
4         Korean Restaurant   0.0


----Hook----
                 venue  freq
0                  Pub  0.17
1  Fish and Chips Shop  0.17
2               Bakery  0.17
3          Supermarket  0.17
4        Grocery Store  0.17


----Kingston Vale----
                   venue  freq
0  Sporting Goods Retail  0.25
1          Sandwich Spot  0.25
2                    Bar  0.25
3          Grocery Store  0.25
4                  Plaza  0.00


----Kingston upon Thames----
                   venue  freq
0            Coffee Shop  0.10
1                   Café  0.07
2        Thai Restaurant  0.07
3       Sushi Restaurant  0.07
4  Arts and Crafts Store  0.03


----Malden Rushett----
           venue  freq
0            Pub   0.2
1  Grocery Store   0.2
2  Garden Center   0.2
3   Fuel Station   0.2
4     Restaurant   0.2


----Motspur Park----
                   venue  freq
0                   Park   0.5
1             Restaurant   0.5
2  Arts and Crafts Store   0.0
3             Public Art   0.0
4       Kebab Restaurant   0.0


----New Malden----
                venue  freq
0   Korean Restaurant  0.38
1   Indian Restaurant  0.12
2                 Bar  0.12
3         Supermarket  0.12
4  Chinese Restaurant  0.12


----Norbiton----
                 venue  freq
0    Indian Restaurant  0.12
1           Restaurant  0.08
2                  Pub  0.08
3   Italian Restaurant  0.08
4  Fried Chicken Joint  0.04


----Old Malden----
          venue  freq
0  Rail Station  0.17
1   Coffee Shop  0.17
2    Steakhouse  0.17
3    Restaurant  0.17
4        Retail  0.17


----Seething Wells----
               venue  freq
0  Indian Restaurant  0.15
1                Pub  0.15
2               Café  0.15
3        Coffee Shop  0.10
4           Pet Café  0.05


----Surbiton----
           venue  freq
0    Coffee Shop  0.23
1            Pub  0.10
2       Pharmacy  0.07
3  Grocery Store  0.07
4    Supermarket  0.07


----Tolworth----
               venue  freq
0      Grocery Store  0.20
1         Restaurant  0.13
2  Indian Restaurant  0.07
3      Sandwich Spot  0.07
4       Rail Station  0.07


def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# 根据顶级场馆数量创建列
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# 创建一个新的数据框
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = kut_grouped['Neighborhood']

for ind in np.arange(kut_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(kut_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


# 从聚类阶段导入 k-means
from sklearn.cluster import KMeans

# 设置簇数
kclusters = 5

kut_grouped_clustering = kut_grouped.drop('Neighborhood', 1)

# 运行 k 均值聚类
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kut_grouped_clustering)

# 检查为数据帧中的每一行生成的簇标签
kmeans.labels_[0:10]

C:\Users\1\AppData\Local\Temp\ipykernel_1864\4283508076.py:7: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.
  kut_grouped_clustering = kut_grouped.drop('Neighborhood', 1)
d:\anaconda\envs\pytorch2.0\Lib\site-packages\sklearn\cluster\_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
d:\anaconda\envs\pytorch2.0\Lib\site-packages\sklearn\cluster\_kmeans.py:1382: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.
  warnings.warn(

array([0, 1, 2, 1, 3, 1, 1, 4, 1, 1])


# 添加聚类标签
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

kut_merged = kut_neig

# 将 toronto_grouped 与 toronto_data 合并以添加每个社区的纬度/经度
kut_merged = kut_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

kut_merged.head() # check the last columns!


kut_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Neighborhood            15 non-null     object 
 1   Borough                 15 non-null     object 
 2   Latitude                15 non-null     float64
 3   Longitude               15 non-null     float64
 4   Cluster Labels          14 non-null     float64
 5   1st Most Common Venue   14 non-null     object 
 6   2nd Most Common Venue   14 non-null     object 
 7   3rd Most Common Venue   14 non-null     object 
 8   4th Most Common Venue   14 non-null     object 
 9   5th Most Common Venue   14 non-null     object 
 10  6th Most Common Venue   14 non-null     object 
 11  7th Most Common Venue   14 non-null     object 
 12  8th Most Common Venue   14 non-null     object 
 13  9th Most Common Venue   14 non-null     object 
 14  10th Most Common Venue  14 non-null     object 
dtypes: float64(3), object(12)
memory usage: 1.9+ KB


# 删除具有 NaN 值的行
kut_merged.dropna(inplace = True)


kut_merged.shape

(14, 15)


kut_merged['Cluster Labels'] = kut_merged['Cluster Labels'].astype(int)


kut_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 14 entries, 0 to 14
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Neighborhood            14 non-null     object 
 1   Borough                 14 non-null     object 
 2   Latitude                14 non-null     float64
 3   Longitude               14 non-null     float64
 4   Cluster Labels          14 non-null     int32  
 5   1st Most Common Venue   14 non-null     object 
 6   2nd Most Common Venue   14 non-null     object 
 7   3rd Most Common Venue   14 non-null     object 
 8   4th Most Common Venue   14 non-null     object 
 9   5th Most Common Venue   14 non-null     object 
 10  6th Most Common Venue   14 non-null     object 
 11  7th Most Common Venue   14 non-null     object 
 12  8th Most Common Venue   14 non-null     object 
 13  9th Most Common Venue   14 non-null     object 
 14  10th Most Common Venue  14 non-null     object 
dtypes: float64(2), int32(1), object(12)
memory usage: 1.7+ KB


# 创建地图
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11.5)

# 为簇设置颜色方案
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# 在地图上添加标记
markers_colors = []
for lat, lon, poi, cluster in zip(kut_merged['Latitude'], kut_merged['Longitude'], kut_merged['Neighborhood'], kut_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.5).add_to(map_clusters)
       
map_clusters


kut_merged[kut_merged['Cluster Labels'] == 0]


kut_merged[kut_merged['Cluster Labels'] == 1]


kut_merged[kut_merged['Cluster Labels'] == 2]


kut_merged[kut_merged['Cluster Labels'] == 3]


kut_merged[kut_merged['Cluster Labels'] == 4]

	No_of_Crimes
Major_Category	Burglary	Criminal Damage	Drugs	Other Notifiable Offences	Robbery	Theft and Handling	Violence Against the Person
Borough
Barking and Dagenham	1287	1949	919	378	534	5607	6067
Barnet	3402	2183	906	499	464	9731	7499
Bexley	1123	1673	646	294	209	4392	4503
Brent	2631	2280	2096	536	919	9026	9205
Bromley	2214	2202	728	417	369	7584	6650

	Borough	No_of_Crimes							Total
Major_Category		Burglary	Criminal Damage	Drugs	Other Notifiable Offences	Robbery	Theft and Handling	Violence Against the Person
0	Barking and Dagenham	1287	1949	919	378	534	5607	6067	16741
1	Barnet	3402	2183	906	499	464	9731	7499	24684
2	Bexley	1123	1673	646	294	209	4392	4503	12840
3	Brent	2631	2280	2096	536	919	9026	9205	26693
4	Bromley	2214	2202	728	417	369	7584	6650	20164
5	Camden	2652	1935	1493	490	899	14088	7626	29183
6	City of London	2	2	10	6	4	129	25	178
7	Croydon	2738	3219	1367	718	1139	9229	10302	28712
8	Ealing	2492	2562	1355	613	669	10040	9396	27127
9	Enfield	2541	2136	1063	492	807	8037	7409	22485
10	Greenwich	1780	2476	867	521	486	8010	8590	22730
11	Hackney	2719	1981	1353	499	1030	11851	8832	28265
12	Hammersmith and Fulham	1531	1408	1321	474	397	8925	6118	20174
13	Haringey	2474	2233	1540	551	1248	10554	8574	27174
14	Harrow	1994	1212	473	267	377	4537	4293	13153
15	Havering	1826	1804	718	389	311	5919	5936	16903
16	Hillingdon	2229	2470	1042	1305	420	9045	7830	24341
17	Hounslow	1808	2213	1086	665	392	8682	7405	22251
18	Islington	2051	1903	1777	508	936	12077	7840	27092
19	Kensington and Chelsea	1449	1051	1680	326	523	9855	4696	19580
20	Kingston upon Thames	879	1054	743	189	121	3803	3194	9983
21	Lambeth	3087	2764	2738	635	1196	13155	10496	34071
22	Lewisham	2071	2351	1617	485	869	7702	8809	23904
23	Merton	1419	1418	466	249	283	4894	4026	12755
24	Newham	2115	2496	1684	713	1472	11964	9646	30090
25	Redbridge	1997	1650	1017	381	599	7447	6411	19502
26	Richmond upon Thames	1359	1148	320	217	106	4769	3155	11074
27	Southwark	2946	2621	1838	494	1317	12946	9474	31636
28	Sutton	1233	1316	461	253	165	3516	3714	10658
29	Tower Hamlets	2794	2357	1629	678	1234	10953	9608	29253
30	Waltham Forest	1873	1989	1042	444	602	7357	7409	20716
31	Wandsworth	2351	1836	870	415	609	10789	6809	23679
32	Westminster	3218	2179	2049	708	1822	27520	10834	48330

	Borough	No_of_CrimesBurglary	No_of_CrimesCriminal Damage	No_of_CrimesDrugs	No_of_CrimesOther Notifiable Offences	No_of_CrimesRobbery	No_of_CrimesTheft and Handling	No_of_CrimesViolence Against the Person	Total
0	Barking and Dagenham	1287	1949	919	378	534	5607	6067	16741
1	Barnet	3402	2183	906	499	464	9731	7499	24684
2	Bexley	1123	1673	646	294	209	4392	4503	12840
3	Brent	2631	2280	2096	536	919	9026	9205	26693
4	Bromley	2214	2202	728	417	369	7584	6650	20164

	Borough	Burglary	Criminal Damage	Drugs	Other Notifiable Offences	Robbery	Theft and Handling	Violence Against the Person	Total
0	Barking and Dagenham	1287	1949	919	378	534	5607	6067	16741
1	Barnet	3402	2183	906	499	464	9731	7499	24684
2	Bexley	1123	1673	646	294	209	4392	4503	12840
3	Brent	2631	2280	2096	536	919	9026	9205	26693
4	Bromley	2214	2202	728	417	369	7584	6650	20164

	Borough	Inner	Status	Local authority	Political control	Headquarters	Area (sq mi)	Population (2019 est)	Co-ordinates	Nr. in map
0	Barking and Dagenham[note 1]	NaN	NaN	Barking and Dagenham London Borough Council	Labour	Town Hall, 1 Town Square	13.93	212906	.mw-parser-output .geo-default,.mw-parser-outp...	25
1	Barnet	NaN	NaN	Barnet London Borough Council	Labour	Barnet House, 2 Bristol Avenue, Colindale	33.49	395896	51°37′31″N 0°09′06″W / 51.6252°N 0.1517°W	31
2	Bexley	NaN	NaN	Bexley London Borough Council	Conservative	Civic Offices, 2 Watling Street	23.38	248287	51°27′18″N 0°09′02″E / 51.4549°N 0.1505°E	23
3	Brent	NaN	NaN	Brent London Borough Council	Labour	Brent Civic Centre, Engineers Way	16.70	329771	51°33′32″N 0°16′54″W / 51.5588°N 0.2817°W	12
4	Bromley	NaN	NaN	Bromley London Borough Council	Conservative	Civic Centre, Stockwell Close	57.97	332336	51°24′14″N 0°01′11″E / 51.4039°N 0.0198°E	20

	lsoa_code	borough	major_category	minor_category	year	month
0	E01001116	Croydon	Burglary	Burglary in Other Buildings	2016	11
1	E01001646	Greenwich	Violence Against the Person	Other violence	2016	11
2	E01000677	Bromley	Violence Against the Person	Other violence	2015	5
3	E01003774	Redbridge	Burglary	Burglary in Other Buildings	2016	3
4	E01004563	Wandsworth	Robbery	Personal Property	2008	6

	lsoa_code	borough	major_category	minor_category	value	year	month
0	E01004177	Sutton	Theft and Handling	Theft/Taking of Pedal Cycle	1	2016	8
1	E01000733	Bromley	Criminal Damage	Criminal Damage To Motor Vehicle	1	2016	4
2	E01003989	Southwark	Theft and Handling	Theft From Shops	4	2016	8
3	E01002276	Havering	Burglary	Burglary in a Dwelling	1	2016	8
4	E01003674	Redbridge	Drugs	Possession Of Drugs	2	2016	11

	Borough	Inner	Status	Local authority	Political control	Headquarters	Area (sq mi)	Population (2013 est)[1]	Co-ordinates	Nr. in map
0	City of London	( [note 5]	Sui generis; City; Ceremonial county	Corporation of London; Inner Temple; Middle Te...	Non-partisan	Guildhall	1.12	9721.0	51°30′56″N 0°05′32″W / 51.5155°N 0.0922°W	1.0
1	Map all coordinates using: OpenStreetMap	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	Download coordinates as: KML GPX (all coordina...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	.mw-parser-output .navbar{display:inline;font-...	.mw-parser-output .navbar{display:inline;font-...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	City of London Greater London London	City of London Greater London London	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
5	Regional	Greater London Authority London Assembly Mayor...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
6	Ceremonial	City of London Lord Mayor Lord Lieutenant Sher...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
7	London Councils	Boroughs (list) Barking and Dagenham Barnet Be...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
8	Boroughs (list)	Barking and Dagenham Barnet Bexley Brent Broml...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
9	Historical	Metropolitan Board of Works (MBW) 1855–1889 Lo...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	Borough	Inner	Status	Local authority	Political control	Headquarters	Area (sq mi)	Population (2019 est)	Co-ordinates	Nr. in map	Population (2013 est)[1]
0	Barking and Dagenham[]	NaN	NaN	Barking and Dagenham London Borough Council	Labour	Town Hall, 1 Town Square	13.93	212906	.mw-parser-output .geo-default,.mw-parser-outp...	25	NaN
1	Barnet	NaN	NaN	Barnet London Borough Council	Labour	Barnet House, 2 Bristol Avenue, Colindale	33.49	395896	51°37′31″N 0°09′06″W / 51.6252°N 0.1517°W	31	NaN
2	Bexley	NaN	NaN	Bexley London Borough Council	Conservative	Civic Offices, 2 Watling Street	23.38	248287	51°27′18″N 0°09′02″E / 51.4549°N 0.1505°E	23	NaN
3	Brent	NaN	NaN	Brent London Borough Council	Labour	Brent Civic Centre, Engineers Way	16.70	329771	51°33′32″N 0°16′54″W / 51.5588°N 0.2817°W	12	NaN
4	Bromley	NaN	NaN	Bromley London Borough Council	Conservative	Civic Centre, Stockwell Close	57.97	332336	51°24′14″N 0°01′11″E / 51.4039°N 0.0198°E	20	NaN

	Burglary	Criminal Damage	Drugs	Other Notifiable Offences	Robbery	Theft and Handling	Violence Against the Person	Total
count	33.000000	33.000000	33.000000	33.000000	33.000000	33.000000	33.000000	33.000000
mean	2069.242424	1941.545455	1179.212121	479.060606	682.666667	8913.121212	7041.848485	22306.696970
std	737.448644	625.207070	586.406416	223.298698	441.425366	4620.565054	2513.601551	8828.228749
min	2.000000	2.000000	10.000000	6.000000	4.000000	129.000000	25.000000	178.000000
25%	1531.000000	1650.000000	743.000000	378.000000	377.000000	5919.000000	5936.000000	16903.000000
50%	2071.000000	1989.000000	1063.000000	490.000000	599.000000	8925.000000	7409.000000	22730.000000
75%	2631.000000	2351.000000	1617.000000	551.000000	936.000000	10789.000000	8832.000000	27174.000000
max	3402.000000	3219.000000	2738.000000	1305.000000	1822.000000	27520.000000	10834.000000	48330.000000

	Borough	Local authority	Political control	Headquarters	Area (sq mi)	Population (2013 est)[1]	Co-ordinates	Burglary	Criminal Damage	Drugs	Other Notifiable Offences	Robbery	Theft and Handling	Violence Against the Person	Total
33	Westminster	Westminster City Council	Labour	Westminster City Hall, 64 Victoria Street	8.29	NaN	51°29′50″N 0°08′14″W / 51.4973°N 0.1372°W	3218	2179	2049	708	1822	27520	10834	48330
22	Lambeth	Lambeth London Borough Council	Labour	Lambeth Town Hall, Brixton Hill	10.36	NaN	51°27′39″N 0°06′59″W / 51.4607°N 0.1163°W	3087	2764	2738	635	1196	13155	10496	34071
28	Southwark	Southwark London Borough Council	Labour	160 Tooley Street	11.14	NaN	51°30′13″N 0°04′49″W / 51.5035°N 0.0804°W	2946	2621	1838	494	1317	12946	9474	31636
25	Newham	Newham London Borough Council	Labour	Newham Dockside, 1000 Dockside Road	13.98	NaN	51°30′28″N 0°02′49″E / 51.5077°N 0.0469°E	2115	2496	1684	713	1472	11964	9646	30090
30	Tower Hamlets	Tower Hamlets London Borough Council	Labour	Town Hall, Whitechapel Road	7.63	NaN	51°30′36″N 0°00′21″W / 51.5099°N 0.0059°W	2794	2357	1629	678	1234	10953	9608	29253

	Neighborhood	Borough
0	Berrylands	Kingston upon Thames
1	Canbury	Kingston upon Thames
2	Chessington	Kingston upon Thames
3	Coombe	Kingston upon Thames
4	Hook	Kingston upon Thames
5	Kingston upon Thames	Kingston upon Thames
6	Kingston Vale	Kingston upon Thames
7	Malden Rushett	Kingston upon Thames
8	Motspur Park	Kingston upon Thames
9	New Malden	Kingston upon Thames
10	Norbiton	Kingston upon Thames
11	Old Malden	Kingston upon Thames
12	Seething Wells	Kingston upon Thames
13	Surbiton	Kingston upon Thames
14	Tolworth	Kingston upon Thames

	Neighborhood	Borough	Latitude	Longitude
0	Berrylands	Kingston upon Thames	51.393781	-0.284802
1	Canbury	Kingston upon Thames	51.417250	-0.305631
2	Chessington	Kingston upon Thames	51.358336	-0.298622
3	Coombe	Kingston upon Thames	51.419450	-0.265398
4	Hook	Kingston upon Thames	51.367898	-0.307145
5	Kingston upon Thames	Kingston upon Thames	51.412928	-0.301858
6	Kingston Vale	Kingston upon Thames	51.431850	-0.258138
7	Malden Rushett	Kingston upon Thames	51.341052	-0.319076
8	Motspur Park	Kingston upon Thames	51.390985	-0.248898
9	New Malden	Kingston upon Thames	51.405335	-0.263407
10	Norbiton	Kingston upon Thames	51.409999	-0.287396
11	Old Malden	Kingston upon Thames	51.382484	-0.259090
12	Seething Wells	Kingston upon Thames	51.392642	-0.314366
13	Surbiton	Kingston upon Thames	51.393756	-0.303310
14	Tolworth	Kingston upon Thames	51.378876	-0.282860

	Neighborhood Latitude	Neighborhood Longitude	Venue	Venue Latitude	Venue Longitude	Venue Category
Neighborhood
Berrylands	4	4	4	4	4	4
Canbury	14	14	14	14	14	14
Coombe	1	1	1	1	1	1
Hook	6	6	6	6	6	6
Kingston Vale	4	4	4	4	4	4
Kingston upon Thames	30	30	30	30	30	30
Malden Rushett	5	5	5	5	5	5
Motspur Park	2	2	2	2	2	2
New Malden	8	8	8	8	8	8
Norbiton	25	25	25	25	25	25
Old Malden	6	6	6	6	6	6
Seething Wells	20	20	20	20	20	20
Surbiton	30	30	30	30	30	30
Tolworth	15	15	15	15	15	15

	Neighborhood	Arts and Crafts Store	Asian Restaurant	Bagel Shop	Bakery	Bar	Beer Bar	Bistro	Bowling Alley	Breakfast Spot	...	Rock Club	Sandwich Spot	Seafood Restaurant	Sporting Goods Retail	Steakhouse	Supermarket	Sushi Restaurant	Tea Room	Thai Restaurant	Turkish Restaurant
0	Berrylands	0.000000	0.000000	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
1	Canbury	0.000000	0.071429	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.071429	0.000000	0.000000	0.000000	0.000000
2	Coombe	0.000000	0.000000	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
3	Hook	0.000000	0.000000	0.00	0.166667	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.166667	0.000000	0.000000	0.000000	0.000000
4	Kingston Vale	0.000000	0.000000	0.00	0.000000	0.250000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.250000	0.00	0.25	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
5	Kingston upon Thames	0.033333	0.033333	0.00	0.033333	0.000000	0.033333	0.000000	0.000000	0.000000	...	0.033333	0.000000	0.00	0.00	0.000000	0.033333	0.066667	0.000000	0.066667	0.033333
6	Malden Rushett	0.000000	0.000000	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
7	Motspur Park	0.000000	0.000000	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
8	New Malden	0.000000	0.000000	0.00	0.000000	0.125000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.125000	0.000000	0.000000	0.000000	0.000000
9	Norbiton	0.000000	0.000000	0.04	0.000000	0.000000	0.000000	0.000000	0.000000	0.040000	...	0.000000	0.000000	0.04	0.00	0.000000	0.040000	0.000000	0.000000	0.000000	0.040000
10	Old Malden	0.000000	0.000000	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.166667	0.000000	0.000000	0.000000	0.000000	0.000000
11	Seething Wells	0.000000	0.000000	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.00	0.00	0.000000	0.050000	0.000000	0.000000	0.000000	0.000000
12	Surbiton	0.000000	0.000000	0.00	0.033333	0.033333	0.000000	0.033333	0.000000	0.033333	...	0.000000	0.000000	0.00	0.00	0.000000	0.066667	0.000000	0.033333	0.033333	0.000000
13	Tolworth	0.000000	0.000000	0.00	0.000000	0.000000	0.000000	0.000000	0.066667	0.000000	...	0.000000	0.066667	0.00	0.00	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000

	Neighborhood	1st Most Common Venue	2nd Most Common Venue	3rd Most Common Venue	4th Most Common Venue	5th Most Common Venue	6th Most Common Venue	7th Most Common Venue	8th Most Common Venue	9th Most Common Venue	10th Most Common Venue
0	Berrylands	Nature Preserve	Park	Bus Stop	Café	Arts and Crafts Store	Rail Station	Korean Restaurant	Market	Movie Theater	Pet Café
1	Canbury	Pub	Indian Restaurant	Café	Asian Restaurant	Hotel	Park	Fuel Station	Plaza	Fish and Chips Shop	Japanese Restaurant
2	Coombe	Food and Beverage Retail	Arts and Crafts Store	Public Art	Kebab Restaurant	Korean Restaurant	Market	Movie Theater	Nature Preserve	Park	Pet Café
3	Hook	Pub	Fish and Chips Shop	Bakery	Supermarket	Grocery Store	Fuel Station	Pet Supplies Store	Plaza	Pizzeria	Pharmacy
4	Kingston Vale	Sporting Goods Retail	Sandwich Spot	Bar	Grocery Store	Plaza	Pizzeria	Pharmacy	Pet Supplies Store	Pet Café	Arts and Crafts Store

简介：寻找最安全的居住区¶

数据¶

第 1 部分：预处数据集，显示 2008 年至 2016 年的伦敦犯罪事件¶

伦敦犯罪数据¶

导入必要的库¶

定义 Foursquare 凭证和版本¶

读入数据集¶

获取最新的犯罪率（2016 年）¶

更改列名称¶

每个行政区的犯罪总数¶

每个主要类别的犯罪总数¶

旋转表格以查看编号。每个行政区每个主要类别的犯罪数量¶

去掉多索引，这样更容易合并¶

重命名列¶

第 2 部分：从 Wikipedia 页面 抓取伦敦不同行政区的其他信息¶

将表格转换为数据框¶

网站上的第二个表包含附加自治市镇，即伦敦金融城¶

将数据框附加在一起¶

检查最后一行是否正确附加¶

查看数据集的信息¶

删除数据集中不需要的字符串¶

检查新建表的类型¶

检查两个数据框中的行政区是否匹配。¶

查找不匹配的行政区的索引¶

更改行政区名称以匹配其他数据框¶

检查两个数据集中的行政区名称是否匹配¶

我们可以将两个数据框组合在一起¶

重新排列列¶

方法论¶

探索性数据分析¶

数据的描述性统计¶

检查列名是否为字符串¶

按降序对犯罪总数进行排序，以查看犯罪数量最高的 5 个行政区¶

可视化犯罪率最高的五个行政区¶

我们会远离这些地方:)¶

按升序对犯罪总数进行排序，查看犯罪数量最多的 5 个行政区¶

可视化犯罪数量最少的五个行政区¶

根据维基百科页面，伦敦金融城是大伦敦的第 33 个主要区，但它不是伦敦的一个行政区。¶

因此，我们将重点关注下一个犯罪率最低的行政区，即泰晤士河畔金斯顿¶

可视化“泰晤士河畔金斯顿”行政区的不同类型犯罪¶

第 3 部分：创建伦敦最安全行政区的新数据集并生成其坐标。 <a名称=“第3部分”></a>¶

查找泰晤士河畔金斯顿社区中每个社区的坐标¶

获取英国伦敦 Berrylands 的坐标（泰晤士河畔金斯顿的中心街区）¶

可视化泰晤士河畔金斯顿自治市的社区¶

建模¶

创建一个函数来从每个 Neighborhood 中提取场地¶

一热编码¶

按邻域并取每个类别出现频率的平均值对行进行分组¶

创建场地数据框¶

使用 k 均值聚类将相似的邻域聚类在一起¶

可视化集群¶

分析¶

检查第一个簇¶

检查第二个集群¶

检查第三个集群¶

检查第四个簇¶

检查第五个簇¶

结果和讨论¶

结论¶

简介：寻找最安全的居住区 ¶

数据 ¶

第 1 部分：预处数据集，显示 2008 年至 2016 年的伦敦犯罪事件 ¶

第 2 部分：从 Wikipedia 页面抓取伦敦不同行政区的其他信息 ¶

方法论 ¶

探索性数据分析 ¶

建模 ¶

分析 ¶

结果和讨论 ¶

结论 ¶