import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


data = pd.read_csv('London_2022_mass_results.csv')


data.head()


gender_count = data['Gender'].value_counts()
plt.pie(gender_count, labels=['Men', 'Women'], autopct='%1.1f%%')
plt.axis('equal')
plt.title('Runners Gender')
plt.show()


data[['Name', 'Country']] = data['Name'].str.extract(r'(.+?) \((\w{3})\)')


data.head()


top100 = data.iloc[:100].Country.value_counts()

print(top100)

Country
GBR    76
FRA     4
ITA     3
CAN     2
DOM     1
IRL     1
NED     1
GER     1
RSA     1
POL     1
HKG     1
JPN     1
CHA     1
ARG     1
ESP     1
FIN     1
USA     1
BEL     1
NOR     1
Name: count, dtype: int64


country_counts = data['Country'].value_counts()

print(country_counts)

Country
GBR    29575
USA     2671
ITA      734
IRL      691
FRA      547
       ...  
BAR        1
MON        1
ARM        1
ASA        1
CMR        1
Name: count, Length: 141, dtype: int64


sns.countplot(x = "Category", hue = "Gender", edgecolor='black', data = data)
plt.ylabel('Number of Runnners')
plt.title('Runners in each age category')
plt.show()


data['Seconds_Finish'] = pd.to_timedelta(data['Finish Time']).dt.total_seconds()

data['Seconds_FirstHalf'] = pd.to_timedelta(data['Half Time']).dt.total_seconds()

data['Seconds_SecondHalf'] = data['Seconds_Finish'] - data['Seconds_FirstHalf']


plt.figure(figsize=(10, 6))
sns.boxplot(x='Category', y='Seconds_Finish', hue='Gender', data= data)
plt.title('Finish Times (Seconds) by Gender and Age Group')
plt.xlabel('Age Group Category')
plt.ylabel('Seconds')
plt.show()


male_times = data[data['Gender'] == 'M']['Seconds_Finish']
female_times = data[data['Gender'] == 'W']['Seconds_Finish']


plt.hist(male_times, bins=20, alpha=0.5, label='Male')
plt.hist(female_times, bins=20, alpha=0.5, label='Female')


plt.xlabel('Marathon Time - Seconds')
plt.ylabel('Frequency')
plt.title('Marathon Times by Gender')

plt.legend()

plt.show()


sampled_df = data.sample(frac=0.1)

sns.scatterplot(x='Seconds_FirstHalf', y='Seconds_SecondHalf', hue='Gender', data=sampled_df)
plt.plot(sampled_df['Seconds_FirstHalf'], sampled_df['Seconds_FirstHalf'], color='gray', linestyle='--')

plt.xlabel('First Half - (Seconds)')
plt.ylabel('Second Half - (Seconds)')
plt.title('First Half Vs. Second Half Split Time - (Sample of 10% of Runners)')

plt.show()


count = len(data[data['Seconds_SecondHalf'] < data['Seconds_FirstHalf']])

	Overall Place	Gender Place	Category Place	Name	Club	Runner Number	Gender	Category	Event	Half Time	Finish Time
0	1	1	1	Frith, Thomas (GBR)	Woodford Green AC with Essex Ladies	1252	M	18-39	Mass	1:08:47	02:18:35
1	2	2	2	Hogan, Sean (GBR)	Poole Runners	1259	M	18-39	Mass	1:09:32	02:18:51
2	3	3	3	Morwood, Joe (GBR)	Aldershot Farnham & District	1251	M	18-39	Mass	1:07:41	02:20:33
3	4	4	4	Wilson, Kenny (GBR)	Moray Road Runners	1262	M	18-39	Mass	1:08:47	02:20:40
4	5	5	1	Laybourne, Gary (GBR)	South London Harriers	1261	M	40-44	Mass	1:08:47	02:21:07

	Overall Place	Gender Place	Category Place	Name	Club	Runner Number	Gender	Category	Event	Half Time	Finish Time	Country
0	1	1	1	Frith, Thomas	Woodford Green AC with Essex Ladies	1252	M	18-39	Mass	1:08:47	02:18:35	GBR
1	2	2	2	Hogan, Sean	Poole Runners	1259	M	18-39	Mass	1:09:32	02:18:51	GBR
2	3	3	3	Morwood, Joe	Aldershot Farnham & District	1251	M	18-39	Mass	1:07:41	02:20:33	GBR
3	4	4	4	Wilson, Kenny	Moray Road Runners	1262	M	18-39	Mass	1:08:47	02:20:40	GBR
4	5	5	1	Laybourne, Gary	South London Harriers	1261	M	40-44	Mass	1:08:47	02:21:07	GBR

London Marathon 2022 Analysis¶

A project practicing to display various visuals from a dataset¶

Import Libraries¶

Data Loading¶

The Distribution of Men and Women in the Race¶

Extracting Name and Country Abbreviation from the data in the Name column.¶

Finding Country Code/Frequency of the Top 100 Runners¶

Extending this to view the country codes of all runners¶

The Distribution of Men and Women in the Race (within each Category)¶

Converting the 'First Half' and 'Finish Time' columns into seconds, and finding the time for the second half of the race¶

Finish Times Analysis¶

Finish Times Analysis¶