SQL-MarathonAnalysis

London Marathon 2022 Data
(Download the CSV file used)

  • To See The File:
SELECT * FROM MarathonTable LIMIT 5;  
Overall Place Gender Place Category Place Name Club Runner Number Gender Category Event Half Time Finish Time
1 1 1 Frith, Thomas (GBR) Woodford Green AC with Essex Ladies 1252 M 18-39 Mass 1:08:47 02:18:35
2 2 2 Hogan, Sean (GBR) Poole Runners 1259 M 18-39 Mass 1:09:32 02:18:51
3 3 3 Morwood, Joe (GBR) Aldershot Farnham & District 1251 M 18-39 Mass 1:07:41 02:20:33
4 4 4 Wilson, Kenny (GBR) Moray Road Runners 1262 M 18-39 Mass 1:08:47 02:20:40
5 5 1 Laybourne, Gary (GBR) South London Harriers 1261 M 40-44 Mass 1:08:47 02:21:07
  • Counting The Number of Men and Women in Each Category:
SELECT Gender, Category, Count(*) FROM MarathonTable GROUP BY gender, category ORDER BY gender, category;  
  • The Number of Runner in Each Country:

 /*Note: the name column displays text such as 'Jim Hardy (ENG)' */  

SELECT   
    SUBSTRING(Name, INSTR(Name, '(') + 1, 3) AS country_code,  
    COUNT(*) AS num_runners  
FROM   
    MarathonTable  
GROUP BY   
    SUBSTRING(Name, INSTR(Name, '(') + 1, 3)  
ORDER BY 2 DESC;  
  • Top 3 Runners in Each Category (for Men and Women):
WITH RankedRunners AS (  
    SELECT  
        Gender, Category, Name, "Finish Time",  
        ROW_NUMBER() OVER (PARTITION BY Gender,Category ORDER BY 'Finish Time') AS Rank  
    FROM  
        MarathonTable  
)  

SELECT  
    Gender, Category, Rank, Name, "Finish Time",  
FROM  
    RankedRunners  
WHERE  
    Rank <= 3  
ORDER BY  
    Gender, Category, Rank;  
  • The Average Finish Time BY Gender and Category:
/*Note that AVG(TIME_TO_SEC(  )) is only available in certain versions of SQL, so I converted the Finish Time manually*/  

SELECT  
    Gender, Category,  
    ROUND(AVG(EXTRACT(HOUR FROM "Finish Time") * 3600 + EXTRACT(MINUTE FROM "Finish Time") * 60 + EXTRACT(SECOND FROM "Finish Time")),2) AS AverageFinishTime  

FROM  
    MarathonTable  
GROUP BY  
    Gender, Category;  
  • The Number of Finishers Between 2-3 hours, 3-4 hours, etc:
SELECT   
    SUM(CASE WHEN "Finish Time" >= '2:00:00' AND "Finish Time" < '3:00:00' THEN 1 ELSE 0 END) AS "2-3 hours",  
    SUM(CASE WHEN "Finish Time" >= '3:00:00' AND "Finish Time" < '4:00:00' THEN 1 ELSE 0 END) AS "3-4 hours",  
    SUM(CASE WHEN "Finish Time" >= '4:00:00' AND "Finish Time" < '5:00:00' THEN 1 ELSE 0 END) AS "4-5 hours",  
    SUM(CASE WHEN "Finish Time" >= '5:00:00' AND "Finish Time" < '6:00:00' THEN 1 ELSE 0 END) AS "5-6 hours",  
    SUM(CASE WHEN "Finish Time" >= '6:00:00' AND "Finish Time" < '7:00:00' THEN 1 ELSE 0 END) AS "6-7 hours",  
    SUM(CASE WHEN "Finish Time" >= '7:00:00' AND "Finish Time" < '8:00:00' THEN 1 ELSE 0 END) AS "7-8 hours",  
    SUM(CASE WHEN "Finish Time" >= '8:00:00' THEN 1 ELSE 0 END) AS "8+ hours"  
FROM MarathonTable;  
  • The Number of Finishers With A Negative Split (eg, the 2nd half of the race was faster than the first):
WITH SecondsForRunners AS (  
    SELECT  
        Name,  
        (EXTRACT(HOUR FROM "Finish Time") * 3600 + EXTRACT(MINUTE FROM "Finish Time") * 60 + EXTRACT(SECOND FROM "Finish Time")) AS FinishTime_Seconds,  
        (EXTRACT(HOUR FROM "Half Time") * 3600 + EXTRACT(MINUTE FROM "Half Time") * 60 + EXTRACT(SECOND FROM "Half Time")) AS HalfTime_Seconds  

    FROM  
        MarathonTable  
)  

SELECT COUNT(*)  
FROM SecondsForRunners  
WHERE FinishTime_Seconds - HalfTime_Seconds < HalfTime_Seconds;  
  • Finding The Time difference (in seconds) Between The Fastest And Slowest Runner In Each Category:
/*Note that AVG(TIME_TO_SEC(  )) is only available in certain versions of SQL, so I converted the Finish Time manually*/  

 WITH SecondsForRunners AS (  
    SELECT  
        Gender,  
        Category,  
        ROUND((EXTRACT(HOUR FROM "Finish Time") * 3600 + EXTRACT(MINUTE FROM "Finish Time") * 60 + EXTRACT(SECOND FROM "Finish Time")),2) AS FinishTime_Seconds  

    FROM  
        MarathonTable  
)  

SELECT  
    Gender,Category,  
    (MAX(FinishTime_Seconds) - MIN(FinishTime_Seconds)) AS "Slowest-Fastest Times"  
FROM  
    SecondsForRunners  
GROUP by  
    Gender, Category;