pivot table for year on year
This is a quick example of how to use pivot_table, to calculate year on year percentage sales .
import sqlalchemy
import pyodbc
import numpy as np
from pandas import DataFrame
from bokeh.plotting import figure, output_file, show
import pandas as pd
df = DataFrame
## connect to SqlServer Database and get information.
try:
import urllib
params = urllib.quote_plus("DRIVER={SQL Server Native Client 11.0};SERVER=.\MSSQLSERVER_ENT;DATABASE=Antrak;Trusted_Connection=yes;")
engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect=%s"' % params)
resoverall = engine.execute("SELECT * FROM FlightBookingsMain")
df = df(resoverall.fetchall())
df.columns = resoverall.keys()
except (RuntimeError, TypeError, NameError):
print('Error in Conneccting')
print(RuntimeError, TypeError, NameError)
finally:
print("connected")
#lets check the head
df.head()
#check lenght of data frame
len(df)
# let get a start date we will read our data from
from datetime import datetime
date1 = datetime.strptime('30-01-16', '%d-%m-%y').date()
date2 = datetime.strptime('30-01-17', '%d-%m-%y').date()
date3 = datetime.strptime('01-02-16', '%d-%m-%y').date()
#2016-02-01
date1
#date2
#lets get the data from 30th January 1999 to 30th January 2018
year_subset = df.loc[(df['Booking Date']> date1) & (df['Booking Date']< date2)]
#year_subset = df.loc[(df['Booking Date']> date1)]
year_subset.head()
#lets change some values in the age column to zero.
#lets change all the Age values on 2nd February 2016 to zero
year_subset.ix[year_subset['Booking Date']==date3,'Age'] =0
#lets check if we have got some zero in our AGE column
year_subset[year_subset['Age']==0].head(3)
#lets define a function to calculate the year on year from two columns
def get_change(current, previous):
#if current == previous:
#return 0.0
try:
return (current - previous)/previous*100.0
except ZeroDivisionError:
print('error!!')
return 0
##lets get the Age and AFP columns with a sum
gettable = pd.pivot_table(year_subset, index=['Booking Date'], values=['AFP', 'Age'], aggfunc = np.sum)
gettable
#lets check some data
gettable.ix[: , 1:2]
#lets run the function by passing multiple columns
get_change(gettable['AFP'], gettable['Age'])
#check head of table
gettable.head()
gettable['YoY %'] = gettable.apply(lambda x: ((x['AFP'] - x['Age'])/x['Age'])*100.0, axis=1)
#lets get the year on year value by defining a lambda function
#gettable['YoY %'] = get_change(gettable['AFP'], gettable['Age'])
#round the series values to 2 decimal places
#gettable.rstrip('0').rstrip('.')
gettable = np.trim_zeros(gettable.round(2))
gettable
#run method to get some values
get_change(year_subset['AFP'],year_subset['Age'])