Python Pandas Pivot Table Index location Percentage calculation on Two columns

In [162]:

1

2

3

4

5

6

import sqlalchemy

import pyodbc

import numpy as np

from pandas import DataFrame

from bokeh.plotting import figure, output_file, show

import pandas as pd

In [163]:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

df = DataFrame

## connect to SqlServer Database and get information.

try:

import urllib

params = urllib.quote_plus("DRIVER={SQL Server Native Client 11.0};SERVER=.\MSSQLSERVER_ENT;DATABASE=Antrak;Trusted_Connection=yes;")

engine = sqlalchemy.create_engine('mssql+pyodbc:///?odbc_connect=%s"' % params)

resoverall = engine.execute("SELECT * FROM FlightBookingsMain")

df = df(resoverall.fetchall())

df.columns = resoverall.keys()

except (RuntimeError, TypeError, NameError):

print('Error in Conneccting')

print(RuntimeError, TypeError, NameError)

finally:

print("connected")

1

connected

In [164]:

1 2	#lets check the head df.head()

Out[164]:

	First Name	Last Name	Sex	Phone	Fax	Email Address	Booking Date	Departure Date	Arrival Date	Address1	…	Number of Passengers	Total Cost	Currency Numeric Code	Company website	Flight ID Number	Flight Booking ID	Insurance	AFP	Flight Name	Age
0	Ellis	Harris	m	077 5665 3370	077 5665 3373	Ellis.E.Harris@mailinator.com	2012-12-18	2011-07-16	2000-08-23	63 Hull Road	…	2	983	710	http://qvc.com	4111113236285560	1	16	62	ZAR	62
1	Aaron	Field	m	079 6068 9016	079 6068 9015	Aaron.A.Field@mailinator.com	2014-10-28	2005-02-26	2000-07-30	42 Russell Rd	…	9	2452	710	http://msnbc.com	4111110402728960	2	83	38	ZAR	44
2	Archie	Kaur	m	070 3056 9804	070 3056 9809	Archie.M.Kaur@spambob.com	2003-11-17	2000-12-05	2007-03-07	10 Southend Avenue	…	8	1710	710	http://ap.org	4111110879255550	3	54	58	ZAR	60
3	Bradley	Middleton	m	070 0765 7814	070 0765 7816	Bradley.A.Middleton@dodgeit.com	2013-07-18	2011-02-26	2015-10-25	32 Pier Road	…	6	518	710	http://tripadvisor.com	4111111855573320	4	72	43	ZAR	66
4	Evan	Reeves	m	070 8266 4451	070 8266 4458	Evan.A.Reeves@dodgeit.com	2006-07-20	2006-06-20	2001-02-25	75 Moulton Road	…	7	1943	710	http://aliexpress.com	4111113022178850	5	68	52	ZAR	38

5 rows × 25 columns

In [165]:

1 2	#check lenght of data frame len(df)

Out[165]:

1

40000

In [166]:

1

2

3

4

5

6

7

8

# let get a start date we will read our data from

from datetime import datetime

date1 = datetime.strptime('30-01-16', '%d-%m-%y').date()

date2 = datetime.strptime('30-01-17', '%d-%m-%y').date()

date3 = datetime.strptime('01-02-16', '%d-%m-%y').date()

#2016-02-01

date1

#date2

Out[166]:

1	datetime.date(2016, 1, 30)

In [167]:

1

2

3

#lets get the data from 30th January 1999 to 30th January 2018

year_subset = df.loc[(df['Booking Date']> date1) & (df['Booking Date']< date2)]

#year_subset = df.loc[(df['Booking Date']> date1)]

In [168]:

1	year_subset.head()

Out[168]:

	First Name	Last Name	Sex	Phone	Fax	Email Address	Booking Date	Departure Date	Arrival Date	Address1	…	Number of Passengers	Total Cost	Currency Numeric Code	Company website	Flight ID Number	Flight Booking ID	Insurance	AFP	Flight Name	Age
281	Bradley	Hope	m	078 8099 3168	078 8099 3163	Bradley.G.Hope@trashymail.com	2016-01-31	2010-09-11	2012-02-10	36 Chapel Lane	…	5	256	682	http://ups.com	4111111844936880	282	23	69	SAR	47
469	Louise	Shah	f	079 4989 8781	079 4989 8789	Louise.C.Shah@dodgeit.com	2016-01-31	2009-08-02	2001-05-23	68 Seaford Road	…	10	2206	682	http://delta.com	4111116551108990	470	94	71	SAR	40
1037	Oscar	Howell	m	079 0500 3950	079 0500 3956	Oscar.F.Howell@pookmail.com	2016-02-14	2009-09-11	2012-08-20	46 Maidstone Road	…	10	436	96	http://live.com	4111117962000540	1038	96	86	BND	27
1201	Noah	Middleton	m	070 3851 0870	070 3851 0877	Noah.M.Middleton@trashymail.com	2016-02-01	2012-05-08	2009-07-28	78 Station Rd	…	8	493	977	http://badoo.com	4111115779918600	1202	25	79	BAM	61
1204	Joseph	Kirby	m	078 3986 6290	078 3986 6293	Joseph.M.Kirby@dodgeit.com	2016-02-18	2007-02-06	2003-11-03	1 Peachfield Road	…	7	3112	977	http://weather.com	4111114160574520	1205	57	77	BAM	58

5 rows × 25 columns

In [169]:

1

2

3

4

5

#lets change some values in the age column to zero.

#lets change all the Age values on 2nd February 2016 to zero

year_subset.ix[year_subset['Booking Date']==date3,'Age'] =0

In [170]:

1 2	#lets check if we have got some zero in our AGE column year_subset[year_subset['Age']==0].head(3)

Out[170]:

	First Name	Last Name	Sex	Phone	Fax	Email Address	Booking Date	Departure Date	Arrival Date	Address1	…	Number of Passengers	Total Cost	Currency Numeric Code	Company website	Flight ID Number	Flight Booking ID	Insurance	AFP	Flight Name
1201	Noah	Middleton	m	070 3851 0870	070 3851 0877	Noah.M.Middleton@trashymail.com	2016-02-01	2012-05-08	2009-07-28	78 Station Rd	…	8	493	977	http://badoo.com	4111115779918600	1202	25	79	BAM
9519	Leon	Harvey	m	079 3516 7353	079 3516 7352	Leon.S.Harvey@dodgeit.com	2016-02-01	2001-01-18	2007-05-06	95 Southlands Road	…	2	1262	800	http://cbsnews.com	4111116816186750	9520	29	19	UGX
10748	Naomi	Clayton	f	079 7068 7431	079 7068 7431	Naomi.C.Clayton@trashymail.com	2016-02-01	2008-04-01	2002-06-10	88 Grenoble Road	…	6	2111	788	http://stylebistro.com	4111117735828380	10749	61	16	TND

3 rows × 25 columns

In [171]:

1

2

3

4

5

6

7

8

9

10

#lets define a function to calculate the year on year from two columns

def get_change(current, previous):

#if current == previous:

#return 0.0

try:

return (current - previous)/previous*100.0

except ZeroDivisionError:

print('error!!')

return 0

In [172]:

1

2

3

4

##lets get the Age and AFP columns with a sum

gettable = pd.pivot_table(year_subset, index=['Booking Date'], values=['AFP', 'Age'], aggfunc = np.sum)

gettable

Out[172]:

	AFP	Age
Booking Date
2016-01-31	631	357
2016-02-01	343	0
2016-02-02	318	295
2016-02-03	356	287
2016-02-04	152	52
2016-02-05	261	305
2016-02-06	294	213
2016-02-07	373	311
2016-02-08	259	165
2016-02-09	474	429
2016-02-10	612	318
2016-02-11	312	256
2016-02-12	388	296
2016-02-13	431	433
2016-02-14	375	194
2016-02-15	195	283
2016-02-16	423	288
2016-02-17	411	258
2016-02-18	215	180
2016-02-19	166	92
2016-02-20	432	319

In [173]:

1 2	#lets check some data gettable.ix[: , 1:2]

Out[173]:

	Age
Booking Date
2016-01-31	357
2016-02-01	0
2016-02-02	295
2016-02-03	287
2016-02-04	52
2016-02-05	305
2016-02-06	213
2016-02-07	311
2016-02-08	165
2016-02-09	429
2016-02-10	318
2016-02-11	256
2016-02-12	296
2016-02-13	433
2016-02-14	194
2016-02-15	283
2016-02-16	288
2016-02-17	258
2016-02-18	180
2016-02-19	92
2016-02-20	319

In [174]:

1 2	#lets run the function by passing multiple columns get_change(gettable['AFP'], gettable['Age'])

Out[174]:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

Booking Date

2016-01-31 76.750700

2016-02-01 inf

2016-02-02 7.796610

2016-02-03 24.041812

2016-02-04 192.307692

2016-02-05 -14.426230

2016-02-06 38.028169

2016-02-07 19.935691

2016-02-08 56.969697

2016-02-09 10.489510

2016-02-10 92.452830

2016-02-11 21.875000

2016-02-12 31.081081

2016-02-13 -0.461894

2016-02-14 93.298969

2016-02-15 -31.095406

2016-02-16 46.875000

2016-02-17 59.302326

2016-02-18 19.444444

2016-02-19 80.434783

2016-02-20 35.423197

dtype: float64

In [175]:

1 2	#check head of table gettable.head()

Out[175]:

	AFP	Age
Booking Date
2016-01-31	631	357
2016-02-01	343	0
2016-02-02	318	295
2016-02-03	356	287
2016-02-04	152	52

In [179]:

1

2

3

gettable['YoY %'] = gettable.apply(lambda x: ((x['AFP'] - x['Age'])/x['Age'])*100.0, axis=1)

#lets get the year on year value by defining a lambda function

#gettable['YoY %'] = get_change(gettable['AFP'], gettable['Age'])

In [180]:

1

2

3

4

5

#round the series values to 2 decimal places

#gettable.rstrip('0').rstrip('.')

gettable = np.trim_zeros(gettable.round(2))

gettable

Out[180]:

	AFP	Age	YoY %
Booking Date
2016-01-31	631	357	76.750000
2016-02-01	343	0	inf
2016-02-02	318	295	7.800000
2016-02-03	356	287	24.040000
2016-02-04	152	52	192.310000
2016-02-05	261	305	-14.430000
2016-02-06	294	213	38.030000
2016-02-07	373	311	19.940000
2016-02-08	259	165	56.970000
2016-02-09	474	429	10.490000
2016-02-10	612	318	92.450000
2016-02-11	312	256	21.880000
2016-02-12	388	296	31.080000
2016-02-13	431	433	-0.460000
2016-02-14	375	194	93.300000
2016-02-15	195	283	-31.100000
2016-02-16	423	288	46.880000
2016-02-17	411	258	59.300000
2016-02-18	215	180	19.440000
2016-02-19	166	92	80.430000
2016-02-20	432	319	35.420000

In [178]:

1 2	#run method to get some values get_change(year_subset['AFP'],year_subset['Age'])

Out[178]:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

281 46.808511

469 77.500000

1037 218.518519

1201 inf

1204 32.758621

1326 94.117647

1706 -64.406780

1785 -13.207547

2860 -79.245283

2881 58.974359

3310 39.130435

4509 37.931034

5222 347.619048

5291 37.500000

5325 -17.307692

5379 97.826087

5435 -51.515152

5760 24.615385

5790 171.875000

5873 -54.901961

5882 138.461538

5931 -33.333333

6297 -14.545455

6805 115.789474

6949 19.402985

7068 -31.578947

7263 69.642857

7351 74.418605

7606 -68.750000

7954 14.925373

...

30804 -78.846154

31222 -72.549020

31353 17.142857

32812 285.714286

33233 -6.666667

33247 inf

33271 5.454545

33284 -52.000000

33439 31.034483

33520 -50.000000

33526 126.315789

33637 37.681159

33639 123.333333

34309 115.789474

34338 inf

34534 2.173913

35273 76.190476

35439 164.000000

36161 -68.518519

36960 -40.909091

37037 15.555556

37444 -33.333333

37614 85.185185

37860 81.818182

38118 170.000000

38176 2.857143

38191 90.697674

38264 inf

38854 14.285714

39404 14.492754

dtype: float64

Python Pandas Pivot Table Index location Percentage calculation on Two columns

How to Install Whl Packages and Modules in Python

Matplotlib Pyplot Plt Python Pandas Data Visualization Plotting

Free Open Source Data Visualization Tools and Service

Java Python R SQL Excel Compared Similarities For Data Science and Data Analytics – The Basics

Python PyQt Gui Basics Button Window PyQt4 Pt1

Getting Python Spyder Anaconda IDE To Work Using Virtualenv

Leave a Reply Cancel reply

DataPandas LTS

EXPLORE DataPandas

ImportAnt link

GET IN TOUCH

© 2024 DataPandas

People Who Read The Above Post Also Read This:

Similar Posts

Leave a Reply Cancel reply

DataPandas LTS

EXPLORE DataPandas

ImportAnt link

GET IN TOUCH

© 2024 DataPandas