import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
df = pd.read_csv('CarDataset.csv')
df.head()
Make | Model | Year | Engine Fuel Type | Engine HP | Engine Cylinders | Transmission Type | Driven_Wheels | Number of Doors | Market Category | Vehicle Size | Vehicle Style | highway MPG | city mpg | Popularity | MSRP | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | BMW | 1 Series M | 2011 | premium unleaded (required) | 335.0 | 6.0 | MANUAL | rear wheel drive | 2.0 | Factory Tuner,Luxury,High-Performance | Compact | Coupe | 26 | 19 | 3916 | 46135 |
1 | BMW | 1 Series | 2011 | premium unleaded (required) | 300.0 | 6.0 | MANUAL | rear wheel drive | 2.0 | Luxury,Performance | Compact | Convertible | 28 | 19 | 3916 | 40650 |
2 | BMW | 1 Series | 2011 | premium unleaded (required) | 300.0 | 6.0 | MANUAL | rear wheel drive | 2.0 | Luxury,High-Performance | Compact | Coupe | 28 | 20 | 3916 | 36350 |
3 | BMW | 1 Series | 2011 | premium unleaded (required) | 230.0 | 6.0 | MANUAL | rear wheel drive | 2.0 | Luxury,Performance | Compact | Coupe | 28 | 18 | 3916 | 29450 |
4 | BMW | 1 Series | 2011 | premium unleaded (required) | 230.0 | 6.0 | MANUAL | rear wheel drive | 2.0 | Luxury | Compact | Convertible | 28 | 18 | 3916 | 34500 |
df.dtypes
Make object Model object Year int64 Engine Fuel Type object Engine HP float64 Engine Cylinders float64 Transmission Type object Driven_Wheels object Number of Doors float64 Market Category object Vehicle Size object Vehicle Style object highway MPG int64 city mpg int64 Popularity int64 MSRP int64 dtype: object
df.dropna(inplace=True)
HP = df['Engine HP']
HP_array = HP.to_numpy()
HP_array = HP_array.reshape(-1, 1)
Price = df['MSRP']
Price.dropna(inplace=True)
Price_array = Price.to_numpy()
Price_array = Price_array.reshape(-1, 1)
plt.plot(HP_array,Price_array,'o')
plt.axis([0,800,0,100000])
plt.ylabel('''Retail Price (£)''')
plt.xlabel('Engine: Horse Power')
plt.show()
line_fitter = LinearRegression()
line_fitter.fit(HP_array, Price_array)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LinearRegression()
PricePredict = line_fitter.predict(HP_array)
plt.plot(HP_array,Price_array,'o')
plt.plot(HP_array, PricePredict)
plt.axis([0,800,0,100000])
plt.ylabel('''Retail Price (£)''')
plt.xlabel('Engine: Horse Power')
plt.show()
gradient = line_fitter.coef_[0]
y_intercept = line_fitter.intercept_
print("Gradient:", gradient)
print("Y-Intercept:", y_intercept)
print("Cost of Car = ", gradient, " * Horsepower + ", y_intercept)
Gradient: [401.36907559] Y-Intercept: [-60160.43766107] Cost of Car = [401.36907559] * Horsepower + [-60160.43766107]