Aggregation – Pandas Numpy Python Series DataFrame
In this quick notes, we will go through aggregation in Python.
This is part of lectures on Learning Python for Data Analysis and Visualization by Jose Portilla on Udemy.
|
1 2 3 |
import numpy as np import pandas as pd from pandas import Series, DataFrame |
url = “http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/”
|
1 |
dframe_wine = pd.read_csv('winequality-red.csv', sep=';') |
|
1 |
dframe_wine.head() |
|
1 2 |
#get the average alcohol content for all the wines dframe_wine['alcohol'].mean() |
|
1 2 3 |
#define a function which will return the differnce between the min and maximum values def max_to_min(arr): return arr.max() - arr.min() |
|
1 2 |
wino = dframe_wine.groupby('quality') wino.describe() |
|
1 2 |
#do an aggregation on the groupby object wino.agg(max_to_min) |
|
1 |
wino.agg(sum) |
|
1 |
wino.agg('count') |
|
1 2 |
#create a new column in dataframe as per your requirements dframe_wine['alc / quality ratio'] = dframe_wine['alcohol'] / dframe_wine['quality'] |
|
1 |
dframe_wine |
|
1 2 |
#using pivot table instead of groupby to achieve same results dframe_wine.pivot_table(index='quality') |
|
1 |
dframe_wine.groupby('quality').mean() |
|
1 2 3 |
#lets plot the data on a scatterplot %matplotlib inline dframe_wine.plot(kind='scatter', x='quality', y='alcohol') |


