import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('ManhattanHousing.csv')
df.head()
rental_id | rent | bedrooms | bathrooms | size_sqft | min_to_subway | floor | building_age_yrs | no_fee | has_roofdeck | has_washer_dryer | has_doorman | has_elevator | has_dishwasher | has_patio | has_gym | neighborhood | borough | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1545 | 2550 | 0.0 | 1 | 480 | 9 | 2.0 | 17 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | Upper East Side | Manhattan |
1 | 2472 | 11500 | 2.0 | 2 | 2000 | 4 | 1.0 | 96 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Greenwich Village | Manhattan |
2 | 2919 | 4500 | 1.0 | 1 | 916 | 2 | 51.0 | 29 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | Midtown | Manhattan |
3 | 2790 | 4795 | 1.0 | 1 | 975 | 3 | 8.0 | 31 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | Greenwich Village | Manhattan |
4 | 3946 | 17500 | 2.0 | 2 | 4800 | 3 | 4.0 | 136 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | Soho | Manhattan |
x = df[['bedrooms', 'bathrooms', 'size_sqft', 'min_to_subway', 'floor', 'building_age_yrs', 'no_fee', 'has_roofdeck', 'has_washer_dryer', 'has_doorman', 'has_elevator', 'has_dishwasher', 'has_patio', 'has_gym']]
y = df[['rent']]
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, test_size=0.2, random_state = 6)
mlr = LinearRegression()
mlr.fit(x_train, y_train)
y_predict = mlr.predict(x_test)
plt.scatter(y_test, y_predict, alpha=0.4)
plt.plot(y_test,y_test, alpha = 0.3)
plt.xlabel("Actual Rent Prices")
plt.ylabel("Predicted Rent Prices")
plt.title("Comparing the actual rent with the predicted rent using Mutliple Linear Regression")
plt.show()
print(x.columns)
print(mlr.coef_)
Index(['bedrooms', 'bathrooms', 'size_sqft', 'min_to_subway', 'floor', 'building_age_yrs', 'no_fee', 'has_roofdeck', 'has_washer_dryer', 'has_doorman', 'has_elevator', 'has_dishwasher', 'has_patio', 'has_gym'], dtype='object') [[-302.73009383 1199.3859951 4.79976742 -24.28993151 24.19824177 -7.58272473 -140.90664773 48.85017415 191.4257324 -151.11453388 89.408889 -57.89714551 -19.31948556 -38.92369828]]
plt.scatter(df[['size_sqft']], df[['rent']], alpha=0.4)
plt.xlabel("Size - sq. feet")
plt.ylabel("Rental Prices")
plt.show
<function matplotlib.pyplot.show(close=None, block=None)>
plt.scatter(df[['floor']], df[['rent']], alpha=0.4)
plt.xlabel("Floor Level of Flat")
plt.ylabel("Rental Prices")
plt.show
<function matplotlib.pyplot.show(close=None, block=None)>
print("Train score:")
print(mlr.score(x_train, y_train))
print("Test score:")
print(mlr.score(x_test, y_test))
Train score: 0.7725460559817883 Test score: 0.8050371975357635
JimsFlat = [[2, 1, 620, 16, 1, 98, 1, 0, 1, 0, 0, 1, 1, 0]]
predict = mlr.predict(JimsFlat)
print("You could charge a rental price of: $%.2f" % predict)
You could charge a rental price of: $2090.85