#check for data file
import os
print(os.listdir())
# import all major libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# usage of inline plotting
%matplotlib inline
#load data from CSV file
dataFrame = pd.read_csv('./911.csv')
dataFrame.info()
#check head for our dataframe
dataFrame.head()
#top5 zipcodes
dataFrame['zip'].value_counts().head(5)
#top6 twp reaching to 911
dataFrame['twp'].value_counts().head(6)
#unique reason to call 911
dataFrame['title'].nunique()
# top5 specific reasons to call 911
# assignment was there!
dataFrame['SpecificReason'] = dataFrame['title'].apply(lambda title: title.split(':')[0])
dataFrame.head()
dataFrame['SpecificReason'].value_counts().head()
#plot a countplot for SpecificReason
sns.countplot(x='SpecificReason', data=dataFrame)
#convert timeStamp from object to actual time stamp
dataFrame['timeStamp'] = pd.to_datetime(dataFrame['timeStamp'])
dataFrame.info()
type(dataFrame['timeStamp'].iloc[0])
#create 3 columns for Hour, Months and day of week
dataFrame['Hour'] = dataFrame['timeStamp'].apply(lambda time: time.hour)
dataFrame['Month'] = dataFrame['timeStamp'].apply(lambda time: time.month)
dataFrame['Day'] = dataFrame['timeStamp'].apply(lambda time: time.dayofweek)
dataFrame['Hour'].nunique()
#plot a graph for Month
sns.countplot(x='Month', data=dataFrame, hue='SpecificReason')
#use group_by
byMonth = dataFrame.groupby('Day').count()
byMonth.head()
byMonth['twp'].plot()