Aggregation – Pandas Numpy Python Series DataFrame

1

2

3

import numpy as np

import pandas as pd

from pandas import Series, DataFrame

In [2]:

1	dframe_wine = pd.read_csv('winequality-red.csv', sep=';')

In [4]:

1	dframe_wine.head()

Out[4]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality
0	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5
1	7.8	0.88	0.00	2.6	0.098	25.0	67.0	0.9968	3.20	0.68	9.8	5
2	7.8	0.76	0.04	2.3	0.092	15.0	54.0	0.9970	3.26	0.65	9.8	5
3	11.2	0.28	0.56	1.9	0.075	17.0	60.0	0.9980	3.16	0.58	9.8	6
4	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5

In [5]:

1 2	#get the average alcohol content for all the wines dframe_wine['alcohol'].mean()

Out[5]:

1	10.422983114446502

In [6]:

1

2

3

#define a function which will return the differnce between the min and maximum values

def max_to_min(arr):

return arr.max() - arr.min()

In [7]:

1 2	wino = dframe_wine.groupby('quality') wino.describe()

Out[7]:

		alcohol	chlorides	citric acid	density	fixed acidity	free sulfur dioxide	pH	residual sugar	sulphates	total sulfur dioxide	volatile acidity
quality
3	count	10.000000	10.000000	10.000000	10.000000	10.000000	10.000000	10.000000	10.000000	10.000000	10.000000	10.000000
	mean	9.955000	0.122500	0.171000	0.997464	8.360000	11.000000	3.398000	2.635000	0.570000	24.900000	0.884500
	std	0.818009	0.066241	0.250664	0.002002	1.770875	9.763879	0.144052	1.401596	0.122020	16.828877	0.331256
	min	8.400000	0.061000	0.000000	0.994710	6.700000	3.000000	3.160000	1.200000	0.400000	9.000000	0.440000
	25%	9.725000	0.079000	0.005000	0.996150	7.150000	5.000000	3.312500	1.875000	0.512500	12.500000	0.647500
	50%	9.925000	0.090500	0.035000	0.997565	7.500000	6.000000	3.390000	2.100000	0.545000	15.000000	0.845000
	75%	10.575000	0.143000	0.327500	0.998770	9.875000	14.500000	3.495000	3.100000	0.615000	42.500000	1.010000
	max	11.000000	0.267000	0.660000	1.000800	11.600000	34.000000	3.630000	5.700000	0.860000	49.000000	1.580000
4	count	53.000000	53.000000	53.000000	53.000000	53.000000	53.000000	53.000000	53.000000	53.000000	53.000000	53.000000
	mean	10.265094	0.090679	0.174151	0.996542	7.779245	12.264151	3.381509	2.694340	0.596415	36.245283	0.693962
	std	0.934776	0.076192	0.201030	0.001575	1.626624	9.025926	0.181441	1.789436	0.239391	27.583374	0.220110
	min	9.000000	0.045000	0.000000	0.993400	4.600000	3.000000	2.740000	1.300000	0.330000	7.000000	0.230000
	25%	9.600000	0.067000	0.030000	0.995650	6.800000	6.000000	3.300000	1.900000	0.490000	14.000000	0.530000
	50%	10.000000	0.080000	0.090000	0.996500	7.500000	11.000000	3.370000	2.100000	0.560000	26.000000	0.670000
	75%	11.000000	0.089000	0.270000	0.997450	8.400000	15.000000	3.500000	2.800000	0.600000	49.000000	0.870000
	max	13.100000	0.610000	1.000000	1.001000	12.500000	41.000000	3.900000	12.900000	2.000000	119.000000	1.130000
5	count	681.000000	681.000000	681.000000	681.000000	681.000000	681.000000	681.000000	681.000000	681.000000	681.000000	681.000000
	mean	9.899706	0.092736	0.243686	0.997104	8.167254	16.983847	3.304949	2.528855	0.620969	56.513950	0.577041
	std	0.736521	0.053707	0.180003	0.001589	1.563988	10.955446	0.150618	1.359753	0.171062	36.993116	0.164801
	min	8.500000	0.039000	0.000000	0.992560	5.000000	3.000000	2.880000	1.200000	0.370000	6.000000	0.180000
	25%	9.400000	0.074000	0.090000	0.996200	7.100000	9.000000	3.200000	1.900000	0.530000	26.000000	0.460000
	50%	9.700000	0.081000	0.230000	0.997000	7.800000	15.000000	3.300000	2.200000	0.580000	47.000000	0.580000
	75%	10.200000	0.094000	0.360000	0.997900	8.900000	23.000000	3.400000	2.600000	0.660000	84.000000	0.670000
	max	14.900000	0.611000	0.790000	1.003150	15.900000	68.000000	3.740000	15.500000	1.980000	155.000000	1.330000
6	count	638.000000	638.000000	638.000000	638.000000	638.000000	638.000000	638.000000	638.000000	638.000000	638.000000	638.000000
	mean	10.629519	0.084956	0.273824	0.996615	8.347179	15.711599	3.318072	2.477194	0.675329	40.869906	0.497484
	std	1.049639	0.039563	0.195108	0.002000	1.797849	9.940911	0.153995	1.441576	0.158650	25.038250	0.160962
	min	8.400000	0.034000	0.000000	0.990070	4.700000	1.000000	2.860000	0.900000	0.400000	6.000000	0.160000
	25%	9.800000	0.068250	0.090000	0.995402	7.000000	8.000000	3.220000	1.900000	0.580000	23.000000	0.380000
	50%	10.500000	0.078000	0.260000	0.996560	7.900000	14.000000	3.320000	2.200000	0.640000	35.000000	0.490000
	75%	11.300000	0.088000	0.430000	0.997893	9.400000	21.000000	3.410000	2.500000	0.750000	54.000000	0.600000
	max	14.000000	0.415000	0.780000	1.003690	14.300000	72.000000	4.010000	15.400000	1.950000	165.000000	1.040000
7	count	199.000000	199.000000	199.000000	199.000000	199.000000	199.000000	199.000000	199.000000	199.000000	199.000000	199.000000
	mean	11.465913	0.076588	0.375176	0.996104	8.872362	14.045226	3.290754	2.720603	0.741256	35.020101	0.403920
	std	0.961933	0.029456	0.194432	0.002176	1.992483	10.175255	0.150101	1.371509	0.135639	33.191206	0.145224
	min	9.200000	0.012000	0.000000	0.990640	4.900000	3.000000	2.920000	1.200000	0.390000	7.000000	0.120000
	25%	10.800000	0.062000	0.305000	0.994765	7.400000	6.000000	3.200000	2.000000	0.650000	17.500000	0.300000
	50%	11.500000	0.073000	0.400000	0.995770	8.800000	11.000000	3.280000	2.300000	0.740000	27.000000	0.370000
	75%	12.100000	0.087000	0.490000	0.997360	10.100000	18.000000	3.380000	2.750000	0.830000	43.000000	0.485000
	max	14.000000	0.358000	0.760000	1.003200	15.600000	54.000000	3.780000	8.900000	1.360000	289.000000	0.915000
8	count	18.000000	18.000000	18.000000	18.000000	18.000000	18.000000	18.000000	18.000000	18.000000	18.000000	18.000000
	mean	12.094444	0.068444	0.391111	0.995212	8.566667	13.277778	3.267222	2.577778	0.767778	33.444444	0.423333
	std	1.224011	0.011678	0.199526	0.002378	2.119656	11.155613	0.200640	1.295038	0.115379	25.433240	0.144914
	min	9.800000	0.044000	0.030000	0.990800	5.000000	3.000000	2.880000	1.400000	0.630000	12.000000	0.260000
	25%	11.325000	0.062000	0.302500	0.994175	7.250000	6.000000	3.162500	1.800000	0.690000	16.000000	0.335000
	50%	12.150000	0.070500	0.420000	0.994940	8.250000	7.500000	3.230000	2.100000	0.740000	21.500000	0.370000
	75%	12.875000	0.075500	0.530000	0.997200	10.225000	16.500000	3.350000	2.600000	0.820000	43.000000	0.472500
	max	14.000000	0.086000	0.720000	0.998800	12.600000	42.000000	3.720000	6.400000	1.100000	88.000000	0.850000

In [8]:

1 2	#do an aggregation on the groupby object wino.agg(max_to_min)

Out[8]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol
quality
3	4.9	1.140	0.66	4.5	0.206	31.0	40.0	0.00609	0.47	0.46	2.6
4	7.9	0.900	1.00	11.6	0.565	38.0	112.0	0.00760	1.16	1.67	4.1
5	10.9	1.150	0.79	14.3	0.572	65.0	149.0	0.01059	0.86	1.61	6.4
6	9.6	0.880	0.78	14.5	0.381	71.0	159.0	0.01362	1.15	1.55	5.6
7	10.7	0.795	0.76	7.7	0.346	51.0	282.0	0.01256	0.86	0.97	4.8
8	7.6	0.590	0.69	5.0	0.042	39.0	76.0	0.00800	0.84	0.47	4.2

In [16]:

1	wino.agg(sum)

Out[16]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol
quality
3	83.6	8.845	1.71	26.35	1.225	110.0	249.0	9.97464	33.98	5.70	99.550000
4	412.3	36.780	9.23	142.80	4.806	650.0	1921.0	52.81675	179.22	31.61	544.050000
5	5561.9	392.965	165.95	1722.15	63.153	11566.0	38486.0	679.02757	2250.67	422.88	6741.700000
6	5325.5	317.395	174.70	1580.45	54.202	10024.0	26075.0	635.84041	2116.93	430.86	6781.633333
7	1765.6	80.380	74.66	541.40	15.241	2795.0	6969.0	198.22475	654.86	147.51	2281.716667
8	154.2	7.620	7.04	46.40	1.232	239.0	602.0	17.91382	58.81	13.82	217.700000

In [14]:

1	wino.agg('count')

Out[14]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol
quality
3	10	10	10	10	10	10	10	10	10	10	10
4	53	53	53	53	53	53	53	53	53	53	53
5	681	681	681	681	681	681	681	681	681	681	681
6	638	638	638	638	638	638	638	638	638	638	638
7	199	199	199	199	199	199	199	199	199	199	199
8	18	18	18	18	18	18	18	18	18	18	18

In [17]:

1 2	#create a new column in dataframe as per your requirements dframe_wine['alc / quality ratio'] = dframe_wine['alcohol'] / dframe_wine['quality']

In [18]:

1	dframe_wine

Out[18]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality	alc / quality ratio
0	7.4	0.700	0.00	1.9	0.076	11.0	34.0	0.99780	3.51	0.56	9.4	5	1.880000
1	7.8	0.880	0.00	2.6	0.098	25.0	67.0	0.99680	3.20	0.68	9.8	5	1.960000
2	7.8	0.760	0.04	2.3	0.092	15.0	54.0	0.99700	3.26	0.65	9.8	5	1.960000
3	11.2	0.280	0.56	1.9	0.075	17.0	60.0	0.99800	3.16	0.58	9.8	6	1.633333
4	7.4	0.700	0.00	1.9	0.076	11.0	34.0	0.99780	3.51	0.56	9.4	5	1.880000
5	7.4	0.660	0.00	1.8	0.075	13.0	40.0	0.99780	3.51	0.56	9.4	5	1.880000
6	7.9	0.600	0.06	1.6	0.069	15.0	59.0	0.99640	3.30	0.46	9.4	5	1.880000
7	7.3	0.650	0.00	1.2	0.065	15.0	21.0	0.99460	3.39	0.47	10.0	7	1.428571
8	7.8	0.580	0.02	2.0	0.073	9.0	18.0	0.99680	3.36	0.57	9.5	7	1.357143
9	7.5	0.500	0.36	6.1	0.071	17.0	102.0	0.99780	3.35	0.80	10.5	5	2.100000
10	6.7	0.580	0.08	1.8	0.097	15.0	65.0	0.99590	3.28	0.54	9.2	5	1.840000
11	7.5	0.500	0.36	6.1	0.071	17.0	102.0	0.99780	3.35	0.80	10.5	5	2.100000
12	5.6	0.615	0.00	1.6	0.089	16.0	59.0	0.99430	3.58	0.52	9.9	5	1.980000
13	7.8	0.610	0.29	1.6	0.114	9.0	29.0	0.99740	3.26	1.56	9.1	5	1.820000
14	8.9	0.620	0.18	3.8	0.176	52.0	145.0	0.99860	3.16	0.88	9.2	5	1.840000
15	8.9	0.620	0.19	3.9	0.170	51.0	148.0	0.99860	3.17	0.93	9.2	5	1.840000
16	8.5	0.280	0.56	1.8	0.092	35.0	103.0	0.99690	3.30	0.75	10.5	7	1.500000
17	8.1	0.560	0.28	1.7	0.368	16.0	56.0	0.99680	3.11	1.28	9.3	5	1.860000
18	7.4	0.590	0.08	4.4	0.086	6.0	29.0	0.99740	3.38	0.50	9.0	4	2.250000
19	7.9	0.320	0.51	1.8	0.341	17.0	56.0	0.99690	3.04	1.08	9.2	6	1.533333
20	8.9	0.220	0.48	1.8	0.077	29.0	60.0	0.99680	3.39	0.53	9.4	6	1.566667
21	7.6	0.390	0.31	2.3	0.082	23.0	71.0	0.99820	3.52	0.65	9.7	5	1.940000
22	7.9	0.430	0.21	1.6	0.106	10.0	37.0	0.99660	3.17	0.91	9.5	5	1.900000
23	8.5	0.490	0.11	2.3	0.084	9.0	67.0	0.99680	3.17	0.53	9.4	5	1.880000
24	6.9	0.400	0.14	2.4	0.085	21.0	40.0	0.99680	3.43	0.63	9.7	6	1.616667
25	6.3	0.390	0.16	1.4	0.080	11.0	23.0	0.99550	3.34	0.56	9.3	5	1.860000
26	7.6	0.410	0.24	1.8	0.080	4.0	11.0	0.99620	3.28	0.59	9.5	5	1.900000
27	7.9	0.430	0.21	1.6	0.106	10.0	37.0	0.99660	3.17	0.91	9.5	5	1.900000
28	7.1	0.710	0.00	1.9	0.080	14.0	35.0	0.99720	3.47	0.55	9.4	5	1.880000
29	7.8	0.645	0.00	2.0	0.082	8.0	16.0	0.99640	3.38	0.59	9.8	6	1.633333
…	…	…	…	…	…	…	…	…	…	…	…	…	…
1569	6.2	0.510	0.14	1.9	0.056	15.0	34.0	0.99396	3.48	0.57	11.5	6	1.916667
1570	6.4	0.360	0.53	2.2	0.230	19.0	35.0	0.99340	3.37	0.93	12.4	6	2.066667
1571	6.4	0.380	0.14	2.2	0.038	15.0	25.0	0.99514	3.44	0.65	11.1	6	1.850000
1572	7.3	0.690	0.32	2.2	0.069	35.0	104.0	0.99632	3.33	0.51	9.5	5	1.900000
1573	6.0	0.580	0.20	2.4	0.075	15.0	50.0	0.99467	3.58	0.67	12.5	6	2.083333
1574	5.6	0.310	0.78	13.9	0.074	23.0	92.0	0.99677	3.39	0.48	10.5	6	1.750000
1575	7.5	0.520	0.40	2.2	0.060	12.0	20.0	0.99474	3.26	0.64	11.8	6	1.966667
1576	8.0	0.300	0.63	1.6	0.081	16.0	29.0	0.99588	3.30	0.78	10.8	6	1.800000
1577	6.2	0.700	0.15	5.1	0.076	13.0	27.0	0.99622	3.54	0.60	11.9	6	1.983333
1578	6.8	0.670	0.15	1.8	0.118	13.0	20.0	0.99540	3.42	0.67	11.3	6	1.883333
1579	6.2	0.560	0.09	1.7	0.053	24.0	32.0	0.99402	3.54	0.60	11.3	5	2.260000
1580	7.4	0.350	0.33	2.4	0.068	9.0	26.0	0.99470	3.36	0.60	11.9	6	1.983333
1581	6.2	0.560	0.09	1.7	0.053	24.0	32.0	0.99402	3.54	0.60	11.3	5	2.260000
1582	6.1	0.715	0.10	2.6	0.053	13.0	27.0	0.99362	3.57	0.50	11.9	5	2.380000
1583	6.2	0.460	0.29	2.1	0.074	32.0	98.0	0.99578	3.33	0.62	9.8	5	1.960000
1584	6.7	0.320	0.44	2.4	0.061	24.0	34.0	0.99484	3.29	0.80	11.6	7	1.657143
1585	7.2	0.390	0.44	2.6	0.066	22.0	48.0	0.99494	3.30	0.84	11.5	6	1.916667
1586	7.5	0.310	0.41	2.4	0.065	34.0	60.0	0.99492	3.34	0.85	11.4	6	1.900000
1587	5.8	0.610	0.11	1.8	0.066	18.0	28.0	0.99483	3.55	0.66	10.9	6	1.816667
1588	7.2	0.660	0.33	2.5	0.068	34.0	102.0	0.99414	3.27	0.78	12.8	6	2.133333
1589	6.6	0.725	0.20	7.8	0.073	29.0	79.0	0.99770	3.29	0.54	9.2	5	1.840000
1590	6.3	0.550	0.15	1.8	0.077	26.0	35.0	0.99314	3.32	0.82	11.6	6	1.933333
1591	5.4	0.740	0.09	1.7	0.089	16.0	26.0	0.99402	3.67	0.56	11.6	6	1.933333
1592	6.3	0.510	0.13	2.3	0.076	29.0	40.0	0.99574	3.42	0.75	11.0	6	1.833333
1593	6.8	0.620	0.08	1.9	0.068	28.0	38.0	0.99651	3.42	0.82	9.5	6	1.583333
1594	6.2	0.600	0.08	2.0	0.090	32.0	44.0	0.99490	3.45	0.58	10.5	5	2.100000
1595	5.9	0.550	0.10	2.2	0.062	39.0	51.0	0.99512	3.52	0.76	11.2	6	1.866667
1596	6.3	0.510	0.13	2.3	0.076	29.0	40.0	0.99574	3.42	0.75	11.0	6	1.833333
1597	5.9	0.645	0.12	2.0	0.075	32.0	44.0	0.99547	3.57	0.71	10.2	5	2.040000
1598	6.0	0.310	0.47	3.6	0.067	18.0	42.0	0.99549	3.39	0.66	11.0	6	1.833333

1599 rows × 13 columns

In [20]:

1 2	#using pivot table instead of groupby to achieve same results dframe_wine.pivot_table(index='quality')

Out[20]:

	alc / quality ratio	alcohol	chlorides	citric acid	density	fixed acidity	free sulfur dioxide	pH	residual sugar	sulphates	total sulfur dioxide	volatile acidity
quality
3	3.318333	9.955000	0.122500	0.171000	0.997464	8.360000	11.000000	3.398000	2.635000	0.570000	24.900000	0.884500
4	2.566274	10.265094	0.090679	0.174151	0.996542	7.779245	12.264151	3.381509	2.694340	0.596415	36.245283	0.693962
5	1.979941	9.899706	0.092736	0.243686	0.997104	8.167254	16.983847	3.304949	2.528855	0.620969	56.513950	0.577041
6	1.771587	10.629519	0.084956	0.273824	0.996615	8.347179	15.711599	3.318072	2.477194	0.675329	40.869906	0.497484
7	1.637988	11.465913	0.076588	0.375176	0.996104	8.872362	14.045226	3.290754	2.720603	0.741256	35.020101	0.403920
8	1.511806	12.094444	0.068444	0.391111	0.995212	8.566667	13.277778	3.267222	2.577778	0.767778	33.444444	0.423333

In [22]:

1	dframe_wine.groupby('quality').mean()

Out[22]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	alc / quality ratio
quality
3	8.360000	0.884500	0.171000	2.635000	0.122500	11.000000	24.900000	0.997464	3.398000	0.570000	9.955000	3.318333
4	7.779245	0.693962	0.174151	2.694340	0.090679	12.264151	36.245283	0.996542	3.381509	0.596415	10.265094	2.566274
5	8.167254	0.577041	0.243686	2.528855	0.092736	16.983847	56.513950	0.997104	3.304949	0.620969	9.899706	1.979941
6	8.347179	0.497484	0.273824	2.477194	0.084956	15.711599	40.869906	0.996615	3.318072	0.675329	10.629519	1.771587
7	8.872362	0.403920	0.375176	2.720603	0.076588	14.045226	35.020101	0.996104	3.290754	0.741256	11.465913	1.637988
8	8.566667	0.423333	0.391111	2.577778	0.068444	13.277778	33.444444	0.995212	3.267222	0.767778	12.094444	1.511806

In [24]:

1

2

3

#lets plot the data on a scatterplot

%matplotlib inline

dframe_wine.plot(kind='scatter', x='quality', y='alcohol')

Out[24]:

1	<matplotlib.axes._subplots.AxesSubplot at 0x1a71c4c93c8>

Aggregation – Pandas Numpy Python Series DataFrame

Exploring Statistical Analysis in the Context of Correlation – Testing a Potential Moderator

Running a Random Forest – Data Analysis and Intrepretation

Association Of The Literacy Rate And Life Expectancy & Association Of The Literacy Rate And Income Per Person: The Case of Ghana

How to Install Whl Packages and Modules in Python

Python Pandas Pivot Table Index location Percentage calculation on Two columns – XlsxWriter pt2

Save Multiple Pandas DataFrames to One Single Excel Sheet Side by Side or Dowwards – XlsxWriter

Leave a Reply Cancel reply

DataPandas LTS

EXPLORE DataPandas

ImportAnt link

GET IN TOUCH

© 2026 DataPandas

Similar Posts

Leave a Reply Cancel reply

DataPandas LTS

EXPLORE DataPandas

ImportAnt link

GET IN TOUCH

© 2026 DataPandas