import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')


df = pd.read_csv('ManhattanHousing.csv')


df.head()


x = df[['bedrooms', 'bathrooms', 'size_sqft', 'min_to_subway', 'floor', 'building_age_yrs', 'no_fee', 'has_roofdeck', 'has_washer_dryer', 'has_doorman', 'has_elevator', 'has_dishwasher', 'has_patio', 'has_gym']]

y = df[['rent']]

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, test_size=0.2, random_state = 6)


mlr = LinearRegression()

mlr.fit(x_train, y_train) 

y_predict = mlr.predict(x_test)


plt.scatter(y_test, y_predict, alpha=0.4)
plt.plot(y_test,y_test, alpha = 0.3)

plt.xlabel("Actual Rent Prices")
plt.ylabel("Predicted Rent Prices")

plt.title("Comparing the actual rent with the predicted rent using Mutliple Linear Regression")

plt.show()


print(x.columns)
print(mlr.coef_)

Index(['bedrooms', 'bathrooms', 'size_sqft', 'min_to_subway', 'floor',
       'building_age_yrs', 'no_fee', 'has_roofdeck', 'has_washer_dryer',
       'has_doorman', 'has_elevator', 'has_dishwasher', 'has_patio',
       'has_gym'],
      dtype='object')
[[-302.73009383 1199.3859951     4.79976742  -24.28993151   24.19824177
    -7.58272473 -140.90664773   48.85017415  191.4257324  -151.11453388
    89.408889    -57.89714551  -19.31948556  -38.92369828]]


plt.scatter(df[['size_sqft']], df[['rent']], alpha=0.4)
plt.xlabel("Size - sq. feet")
plt.ylabel("Rental Prices")
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


plt.scatter(df[['floor']], df[['rent']], alpha=0.4)
plt.xlabel("Floor Level of Flat")
plt.ylabel("Rental Prices")
plt.show

<function matplotlib.pyplot.show(close=None, block=None)>


print("Train score:")
print(mlr.score(x_train, y_train))

print("Test score:")
print(mlr.score(x_test, y_test))

Train score:
0.7725460559817883
Test score:
0.8050371975357635


JimsFlat = [[2, 1, 620, 16, 1, 98, 1, 0, 1, 0, 0, 1, 1, 0]]

predict = mlr.predict(JimsFlat)

print("You could charge a rental price of: $%.2f" % predict)

You could charge a rental price of: $2090.85

	rental_id	rent	bedrooms	bathrooms	size_sqft	min_to_subway	floor	building_age_yrs	no_fee	has_roofdeck	has_doorman	has_elevator	has_dishwasher	has_gym	neighborhood	borough
0	1545	2550	0.0	1	480	9	2.0	17	1	1	0	1	1	1	Upper East Side	Manhattan
1	2472	11500	2.0	2	2000	4	1.0	96	0	0	0	0	0	0	Greenwich Village	Manhattan
2	2919	4500	1.0	1	916	2	51.0	29	0	1	1	1	1	0	Midtown	Manhattan
3	2790	4795	1.0	1	975	3	8.0	31	0	0	1	1	1	1	Greenwich Village	Manhattan
4	3946	17500	2.0	2	4800	3	4.0	136	0	0	1	1	1	1	Soho	Manhattan

Multiple Linear Regression¶

Practice of Multiple Linear Regression using house rentals data in Manhattan, NY¶

Import Libraries, File and Inspect Data¶

Set up training and test data¶

Create and fit a Linear Regression model¶

Compare the actual rent prices to the ones predicted by the model¶

Inspecting the coefficients of the features¶

Seeing how some individual features correlate to the rental price¶

Checking the Correlation Coefficient (it's above 0.7, so a good fit)¶

Calculate a good rental price to charge for my flat¶