London Marathon 2022 Data
(Download the CSV file used)
- To See The File:
SELECT * FROM MarathonTable LIMIT 5;
Overall Place | Gender Place | Category Place | Name | Club | Runner Number | Gender | Category | Event | Half Time | Finish Time |
---|---|---|---|---|---|---|---|---|---|---|
1 | 1 | 1 | Frith, Thomas (GBR) | Woodford Green AC with Essex Ladies | 1252 | M | 18-39 | Mass | 1:08:47 | 02:18:35 |
2 | 2 | 2 | Hogan, Sean (GBR) | Poole Runners | 1259 | M | 18-39 | Mass | 1:09:32 | 02:18:51 |
3 | 3 | 3 | Morwood, Joe (GBR) | Aldershot Farnham & District | 1251 | M | 18-39 | Mass | 1:07:41 | 02:20:33 |
4 | 4 | 4 | Wilson, Kenny (GBR) | Moray Road Runners | 1262 | M | 18-39 | Mass | 1:08:47 | 02:20:40 |
5 | 5 | 1 | Laybourne, Gary (GBR) | South London Harriers | 1261 | M | 40-44 | Mass | 1:08:47 | 02:21:07 |
- Counting The Number of Men and Women in Each Category:
SELECT Gender, Category, Count(*) FROM MarathonTable GROUP BY gender, category ORDER BY gender, category;
- The Number of Runner in Each Country:
/*Note: the name column displays text such as 'Jim Hardy (ENG)' */
SELECT
SUBSTRING(Name, INSTR(Name, '(') + 1, 3) AS country_code,
COUNT(*) AS num_runners
FROM
MarathonTable
GROUP BY
SUBSTRING(Name, INSTR(Name, '(') + 1, 3)
ORDER BY 2 DESC;
- Top 3 Runners in Each Category (for Men and Women):
WITH RankedRunners AS (
SELECT
Gender, Category, Name, "Finish Time",
ROW_NUMBER() OVER (PARTITION BY Gender,Category ORDER BY 'Finish Time') AS Rank
FROM
MarathonTable
)
SELECT
Gender, Category, Rank, Name, "Finish Time",
FROM
RankedRunners
WHERE
Rank <= 3
ORDER BY
Gender, Category, Rank;
- The Average Finish Time BY Gender and Category:
/*Note that AVG(TIME_TO_SEC( )) is only available in certain versions of SQL, so I converted the Finish Time manually*/
SELECT
Gender, Category,
ROUND(AVG(EXTRACT(HOUR FROM "Finish Time") * 3600 + EXTRACT(MINUTE FROM "Finish Time") * 60 + EXTRACT(SECOND FROM "Finish Time")),2) AS AverageFinishTime
FROM
MarathonTable
GROUP BY
Gender, Category;
- The Number of Finishers Between 2-3 hours, 3-4 hours, etc:
SELECT
SUM(CASE WHEN "Finish Time" >= '2:00:00' AND "Finish Time" < '3:00:00' THEN 1 ELSE 0 END) AS "2-3 hours",
SUM(CASE WHEN "Finish Time" >= '3:00:00' AND "Finish Time" < '4:00:00' THEN 1 ELSE 0 END) AS "3-4 hours",
SUM(CASE WHEN "Finish Time" >= '4:00:00' AND "Finish Time" < '5:00:00' THEN 1 ELSE 0 END) AS "4-5 hours",
SUM(CASE WHEN "Finish Time" >= '5:00:00' AND "Finish Time" < '6:00:00' THEN 1 ELSE 0 END) AS "5-6 hours",
SUM(CASE WHEN "Finish Time" >= '6:00:00' AND "Finish Time" < '7:00:00' THEN 1 ELSE 0 END) AS "6-7 hours",
SUM(CASE WHEN "Finish Time" >= '7:00:00' AND "Finish Time" < '8:00:00' THEN 1 ELSE 0 END) AS "7-8 hours",
SUM(CASE WHEN "Finish Time" >= '8:00:00' THEN 1 ELSE 0 END) AS "8+ hours"
FROM MarathonTable;
- The Number of Finishers With A Negative Split (eg, the 2nd half of the race was faster than the first):
WITH SecondsForRunners AS (
SELECT
Name,
(EXTRACT(HOUR FROM "Finish Time") * 3600 + EXTRACT(MINUTE FROM "Finish Time") * 60 + EXTRACT(SECOND FROM "Finish Time")) AS FinishTime_Seconds,
(EXTRACT(HOUR FROM "Half Time") * 3600 + EXTRACT(MINUTE FROM "Half Time") * 60 + EXTRACT(SECOND FROM "Half Time")) AS HalfTime_Seconds
FROM
MarathonTable
)
SELECT COUNT(*)
FROM SecondsForRunners
WHERE FinishTime_Seconds - HalfTime_Seconds < HalfTime_Seconds;
- Finding The Time difference (in seconds) Between The Fastest And Slowest Runner In Each Category:
/*Note that AVG(TIME_TO_SEC( )) is only available in certain versions of SQL, so I converted the Finish Time manually*/
WITH SecondsForRunners AS (
SELECT
Gender,
Category,
ROUND((EXTRACT(HOUR FROM "Finish Time") * 3600 + EXTRACT(MINUTE FROM "Finish Time") * 60 + EXTRACT(SECOND FROM "Finish Time")),2) AS FinishTime_Seconds
FROM
MarathonTable
)
SELECT
Gender,Category,
(MAX(FinishTime_Seconds) - MIN(FinishTime_Seconds)) AS "Slowest-Fastest Times"
FROM
SecondsForRunners
GROUP by
Gender, Category;