pivot table for year on year

This is a quick example of how to use pivot_table,  to calculate year on year percentage sales .

In [162]:
import sqlalchemy
import pyodbc
import numpy as np
from pandas import DataFrame
from bokeh.plotting import figure, output_file, show
import pandas as pd

 

In [163]:
df = DataFrame


## connect to SqlServer Database and get information. 
try:

    import urllib
    params = urllib.quote_plus("DRIVER={SQL Server Native Client 11.0};SERVER=.\MSSQLSERVER_ENT;DATABASE=Antrak;Trusted_Connection=yes;")

    engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect=%s"' % params)
    resoverall = engine.execute("SELECT * FROM FlightBookingsMain")


    df = df(resoverall.fetchall())
    df.columns = resoverall.keys()

    
except (RuntimeError, TypeError, NameError):
    print('Error in Conneccting')
    print(RuntimeError, TypeError, NameError)
finally:
    print("connected")

 

connected
In [164]:
#lets check the head
df.head()

 

Out[164]:
First Name Last Name Sex Phone Fax Email Address Booking Date Departure Date Arrival Date Address1 Number of Passengers Total Cost Currency Numeric Code Company website Flight ID Number Flight Booking ID Insurance AFP Flight Name Age
0 Ellis Harris m 077 5665 3370 077 5665 3373 Ellis.E.Harris@mailinator.com 2012-12-18 2011-07-16 2000-08-23 63 Hull Road 2 983 710 http://qvc.com 4111113236285560 1 16 62 ZAR 62
1 Aaron Field m 079 6068 9016 079 6068 9015 Aaron.A.Field@mailinator.com 2014-10-28 2005-02-26 2000-07-30 42 Russell Rd 9 2452 710 http://msnbc.com 4111110402728960 2 83 38 ZAR 44
2 Archie Kaur m 070 3056 9804 070 3056 9809 Archie.M.Kaur@spambob.com 2003-11-17 2000-12-05 2007-03-07 10 Southend Avenue 8 1710 710 http://ap.org 4111110879255550 3 54 58 ZAR 60
3 Bradley Middleton m 070 0765 7814 070 0765 7816 Bradley.A.Middleton@dodgeit.com 2013-07-18 2011-02-26 2015-10-25 32 Pier Road 6 518 710 http://tripadvisor.com 4111111855573320 4 72 43 ZAR 66
4 Evan Reeves m 070 8266 4451 070 8266 4458 Evan.A.Reeves@dodgeit.com 2006-07-20 2006-06-20 2001-02-25 75 Moulton Road 7 1943 710 http://aliexpress.com 4111113022178850 5 68 52 ZAR 38

5 rows × 25 columns

In [165]:
#check lenght of data frame
len(df)

 

Out[165]:
40000
In [166]:
# let get a start date we will read our data from 
from datetime import datetime
date1 = datetime.strptime('30-01-16', '%d-%m-%y').date()
date2 = datetime.strptime('30-01-17', '%d-%m-%y').date()
date3 = datetime.strptime('01-02-16', '%d-%m-%y').date()
#2016-02-01
date1
#date2

 

Out[166]:
datetime.date(2016, 1, 30)
In [167]:
#lets get the data from 30th January 1999 to 30th January 2018
year_subset = df.loc[(df['Booking Date']> date1) & (df['Booking Date']< date2)]
#year_subset = df.loc[(df['Booking Date']> date1)]

 

In [168]:
year_subset.head()

 

Out[168]:
First Name Last Name Sex Phone Fax Email Address Booking Date Departure Date Arrival Date Address1 Number of Passengers Total Cost Currency Numeric Code Company website Flight ID Number Flight Booking ID Insurance AFP Flight Name Age
281 Bradley Hope m 078 8099 3168 078 8099 3163 Bradley.G.Hope@trashymail.com 2016-01-31 2010-09-11 2012-02-10 36 Chapel Lane 5 256 682 http://ups.com 4111111844936880 282 23 69 SAR 47
469 Louise Shah f 079 4989 8781 079 4989 8789 Louise.C.Shah@dodgeit.com 2016-01-31 2009-08-02 2001-05-23 68 Seaford Road 10 2206 682 http://delta.com 4111116551108990 470 94 71 SAR 40
1037 Oscar Howell m 079 0500 3950 079 0500 3956 Oscar.F.Howell@pookmail.com 2016-02-14 2009-09-11 2012-08-20 46 Maidstone Road 10 436 96 http://live.com 4111117962000540 1038 96 86 BND 27
1201 Noah Middleton m 070 3851 0870 070 3851 0877 Noah.M.Middleton@trashymail.com 2016-02-01 2012-05-08 2009-07-28 78 Station Rd 8 493 977 http://badoo.com 4111115779918600 1202 25 79 BAM 61
1204 Joseph Kirby m 078 3986 6290 078 3986 6293 Joseph.M.Kirby@dodgeit.com 2016-02-18 2007-02-06 2003-11-03 1 Peachfield Road 7 3112 977 http://weather.com 4111114160574520 1205 57 77 BAM 58

5 rows × 25 columns

In [169]:
#lets change some values in the age column to zero.

#lets change all the Age values on 2nd February 2016 to zero

year_subset.ix[year_subset['Booking Date']==date3,'Age'] =0

 

In [170]:
#lets check if we have got some zero in our AGE column
year_subset[year_subset['Age']==0].head(3)

 

Out[170]:
First Name Last Name Sex Phone Fax Email Address Booking Date Departure Date Arrival Date Address1 Number of Passengers Total Cost Currency Numeric Code Company website Flight ID Number Flight Booking ID Insurance AFP Flight Name Age
1201 Noah Middleton m 070 3851 0870 070 3851 0877 Noah.M.Middleton@trashymail.com 2016-02-01 2012-05-08 2009-07-28 78 Station Rd 8 493 977 http://badoo.com 4111115779918600 1202 25 79 BAM 0
9519 Leon Harvey m 079 3516 7353 079 3516 7352 Leon.S.Harvey@dodgeit.com 2016-02-01 2001-01-18 2007-05-06 95 Southlands Road 2 1262 800 http://cbsnews.com 4111116816186750 9520 29 19 UGX 0
10748 Naomi Clayton f 079 7068 7431 079 7068 7431 Naomi.C.Clayton@trashymail.com 2016-02-01 2008-04-01 2002-06-10 88 Grenoble Road 6 2111 788 http://stylebistro.com 4111117735828380 10749 61 16 TND 0

3 rows × 25 columns

In [171]:
#lets define a function to calculate the year on year from two columns

def get_change(current, previous):
    #if current == previous:
       #return 0.0
    try:
       return (current - previous)/previous*100.0
    except ZeroDivisionError:
        print('error!!')
        return 0

 

In [172]:
##lets get the Age and AFP columns with a sum

gettable = pd.pivot_table(year_subset, index=['Booking Date'], values=['AFP', 'Age'], aggfunc = np.sum)
gettable

 

Out[172]:
AFP Age
Booking Date
2016-01-31 631 357
2016-02-01 343 0
2016-02-02 318 295
2016-02-03 356 287
2016-02-04 152 52
2016-02-05 261 305
2016-02-06 294 213
2016-02-07 373 311
2016-02-08 259 165
2016-02-09 474 429
2016-02-10 612 318
2016-02-11 312 256
2016-02-12 388 296
2016-02-13 431 433
2016-02-14 375 194
2016-02-15 195 283
2016-02-16 423 288
2016-02-17 411 258
2016-02-18 215 180
2016-02-19 166 92
2016-02-20 432 319
In [173]:
#lets check some data
gettable.ix[: , 1:2]

 

Out[173]:
Age
Booking Date
2016-01-31 357
2016-02-01 0
2016-02-02 295
2016-02-03 287
2016-02-04 52
2016-02-05 305
2016-02-06 213
2016-02-07 311
2016-02-08 165
2016-02-09 429
2016-02-10 318
2016-02-11 256
2016-02-12 296
2016-02-13 433
2016-02-14 194
2016-02-15 283
2016-02-16 288
2016-02-17 258
2016-02-18 180
2016-02-19 92
2016-02-20 319
In [174]:
#lets run the function by passing multiple columns
get_change(gettable['AFP'], gettable['Age'])

 

Out[174]:
Booking Date
2016-01-31     76.750700
2016-02-01           inf
2016-02-02      7.796610
2016-02-03     24.041812
2016-02-04    192.307692
2016-02-05    -14.426230
2016-02-06     38.028169
2016-02-07     19.935691
2016-02-08     56.969697
2016-02-09     10.489510
2016-02-10     92.452830
2016-02-11     21.875000
2016-02-12     31.081081
2016-02-13     -0.461894
2016-02-14     93.298969
2016-02-15    -31.095406
2016-02-16     46.875000
2016-02-17     59.302326
2016-02-18     19.444444
2016-02-19     80.434783
2016-02-20     35.423197
dtype: float64
In [175]:
#check head of table
gettable.head()

 

Out[175]:
AFP Age
Booking Date
2016-01-31 631 357
2016-02-01 343 0
2016-02-02 318 295
2016-02-03 356 287
2016-02-04 152 52
In [179]:
gettable['YoY %'] = gettable.apply(lambda x:  ((x['AFP'] - x['Age'])/x['Age'])*100.0, axis=1)
#lets get the year on year value by defining a lambda function
#gettable['YoY %'] = get_change(gettable['AFP'], gettable['Age'])

 

In [180]:
#round the series values to 2 decimal places
#gettable.rstrip('0').rstrip('.')

gettable  = np.trim_zeros(gettable.round(2))
gettable

 

Out[180]:
AFP Age YoY %
Booking Date
2016-01-31 631 357 76.750000
2016-02-01 343 0 inf
2016-02-02 318 295 7.800000
2016-02-03 356 287 24.040000
2016-02-04 152 52 192.310000
2016-02-05 261 305 -14.430000
2016-02-06 294 213 38.030000
2016-02-07 373 311 19.940000
2016-02-08 259 165 56.970000
2016-02-09 474 429 10.490000
2016-02-10 612 318 92.450000
2016-02-11 312 256 21.880000
2016-02-12 388 296 31.080000
2016-02-13 431 433 -0.460000
2016-02-14 375 194 93.300000
2016-02-15 195 283 -31.100000
2016-02-16 423 288 46.880000
2016-02-17 411 258 59.300000
2016-02-18 215 180 19.440000
2016-02-19 166 92 80.430000
2016-02-20 432 319 35.420000
In [178]:
#run method to get some values
get_change(year_subset['AFP'],year_subset['Age'])

 

Out[178]:
281       46.808511
469       77.500000
1037     218.518519
1201            inf
1204      32.758621
1326      94.117647
1706     -64.406780
1785     -13.207547
2860     -79.245283
2881      58.974359
3310      39.130435
4509      37.931034
5222     347.619048
5291      37.500000
5325     -17.307692
5379      97.826087
5435     -51.515152
5760      24.615385
5790     171.875000
5873     -54.901961
5882     138.461538
5931     -33.333333
6297     -14.545455
6805     115.789474
6949      19.402985
7068     -31.578947
7263      69.642857
7351      74.418605
7606     -68.750000
7954      14.925373
            ...    
30804    -78.846154
31222    -72.549020
31353     17.142857
32812    285.714286
33233     -6.666667
33247           inf
33271      5.454545
33284    -52.000000
33439     31.034483
33520    -50.000000
33526    126.315789
33637     37.681159
33639    123.333333
34309    115.789474
34338           inf
34534      2.173913
35273     76.190476
35439    164.000000
36161    -68.518519
36960    -40.909091
37037     15.555556
37444    -33.333333
37614     85.185185
37860     81.818182
38118    170.000000
38176      2.857143
38191     90.697674
38264           inf
38854     14.285714
39404     14.492754
dtype: float64

 

Similar Posts

Leave a Reply

Your email address will not be published. Required fields are marked *