import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.style.use('ggplot')
df = pd.read_csv('train.csv')
df.head(2)
print(df.shape,'\n')
df.info()
df.describe()
df['date'] = df['datetime'].apply(lambda x:x.split(' ')[0])
# 获取小时数
df["hour"] = df['datetime'].apply(lambda x: x.split()[1].split(":")
[0])
del df['datetime']
# 获取星期数
df['day'] = df['date'].apply(lambda x:pd.to_datetime(x,format='%Y
-%m-%d')).dt.weekday
# 获取月份数
df['month'] = df['date'].apply(lambda x:pd.to_datetime(x,format='%
Y-%m-%d')).dt.month
# 将date 字段改为时间格式
df['date'] = pd.to_datetime(df['date'],format = '%Y-%m-%d')
print(df.info())
df.head(2)
# 工作日用车情况
plt.figure(figsize=(15,6))
df[df['workingday']==1].groupby('hour').mean()['count'].plot(line
style = '-',
# 非工作日用车情况
df[df['workingday']==0].groupby('hour').mean()['count'].plot(line
style = '-',
linewidth = 2,
color = 'g')
linewidth = 2,
color = 'y')
plt.xticks(range(24),df['hour'],fontsize=15)
plt.yticks(fontsize=15)
plt.legend(['工作日','非工作日'],fontsize=15)
plt.title(u'一天各时段用车人数 (工作日 VS 非工作日)',fontsize=20)
plt.show()
plt.figure(figsize=(10,6))
plt.bar(range(7),df.groupby('day').mean()['count'],width=0.3)
plt.ylim([180,200])
plt.title('一周内单车需求量',fontsize=20)
plt.legend(['人数'],fontsize=15)
for x,y in enumerate(df.groupby('day').mean()['count']):
plt.text(x-0.12,y+0.3,'%i'%y,va='center',fontsize=15)
plt.figure(figsize=(15,6))
df.groupby('month').mean()['count'].plot(marker = 'o')
plt.xticks(range(0,13))
plt.title('不同月份用车情况对比',fontsize=20)
plt.legend(['人数'],fontsize=15)
plt.figure(figsize=(15,6))
# 2011 年
df[df['date'].map(lambda x:x.year)==2011].groupby('month')['count
'].mean().plot()
# 2012 年
df[df['date'].map(lambda x:x.year)==2012].groupby('month')['count
'].mean().plot()
plt.xticks(range(0,13))
plt.legend(['2011 年','2012 年'],fontsize=15)
df_registered = df['registered'].sum() / df['count'].sum()
df_casual = df['casual'].sum() / df['count'].sum()
plt.figure(figsize=(15,6))
plt.axis('equal')
plt.pie(x = [df_registered,df_casual],
autopct = '%.1f%%',
textprops = {'fontsize' : 15},
explode = [0.08,0],
shadow=True)
plt.legend(['会员','非会员'],fontsize=12,loc = 'upper left')
plt.figure(figsize=(15,6))
df.groupby('hour')['registered'].mean().plot()
df.groupby('hour')['casual'].mean().plot()
plt.xticks(range(24),df['hour'],fontsize=15)
plt.legend(['会员','非会员'],fontsize = 15)
plt.title('24 小时单车使用情况 (会员 VS 非会员)',fontsize=20)
plt.show()
df_week_registered = df.groupby('day').mean()['registered']
df_week_casuual = df.groupby('day').mean()['casual']
plt.figure(figsize=(15,6))
df_week_registered.plot()
df_week_casuual.plot()
plt.legend(['会员','非会员'])
plt.figure(figsize=(15,6))
df[df['season']==1].groupby('hour').mean()['count'].plot()
df[df['season']==2].groupby('hour').mean()['count'].plot()
df[df['season']==3].groupby('hour').mean()['count'].plot()
df[df['season']==4].groupby('hour').mean()['count'].plot()
plt.legend(['春','夏','秋','冬'],fontsize=14)
plt.xticks(range(24),df['hour'],fontsize=15)
plt.title('季节与用车量的关系',fontsize=20)
plt.show()