from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')


df = pd.read_csv('ManhattanHousing.csv')

print(df.head())

   rental_id   rent  bedrooms  bathrooms  size_sqft  min_to_subway  floor  \
0       1545   2550       0.0          1        480              9    2.0   
1       2472  11500       2.0          2       2000              4    1.0   
2       2919   4500       1.0          1        916              2   51.0   
3       2790   4795       1.0          1        975              3    8.0   
4       3946  17500       2.0          2       4800              3    4.0   

   building_age_yrs  no_fee  has_roofdeck  has_washer_dryer  has_doorman  \
0                17       1             1                 0            0   
1                96       0             0                 0            0   
2                29       0             1                 0            1   
3                31       0             0                 0            1   
4               136       0             0                 0            1   

   has_elevator  has_dishwasher  has_patio  has_gym       neighborhood  \
0             1               1          0        1    Upper East Side   
1             0               0          0        0  Greenwich Village   
2             1               1          0        0            Midtown   
3             1               1          0        1  Greenwich Village   
4             1               1          0        1               Soho   

     borough  
0  Manhattan  
1  Manhattan  
2  Manhattan  
3  Manhattan  
4  Manhattan


features = ['bedrooms','bathrooms','size_sqft','min_to_subway','floor','building_age_yrs']

df_features = df[features]

df_rent = df[['rent']]


training_data, test_data, training_rent, test_rent = train_test_split(df_features,df_rent,test_size = 0.2, random_state = 100)


accuracies = []
for k in range(1,101):
  regressor = KNeighborsRegressor(n_neighbors = k, weights = "distance")
  regressor.fit(training_data,training_rent)
  accuracies.append(regressor.score(test_data, test_rent))

k_list = range(1,101)

plt.plot(k_list, accuracies)
plt.axvline(x=22, color='r', linestyle='--')
plt.xlabel('k (Number of Neighbours)')
plt.ylabel('Accuracy')
plt.title('Regression Accuracy using K Nearest Neighbour Algorithm')
plt.show()


regressor = KNeighborsRegressor(n_neighbors = 22, weights = "distance")


regressor.fit(training_data,training_rent)

KNeighborsRegressor(n_neighbors=22, weights='distance')

KNeighborsRegressor(n_neighbors=22, weights='distance')


predicted_rent = regressor.predict(test_data)


plt.scatter(test_rent, predicted_rent, alpha=0.4)
plt.plot(test_rent,test_rent, alpha = 0.3)

plt.xlabel("Actual Rent Prices")
plt.ylabel("Predicted Rent Prices")

plt.title("Comparing the actual rent with the predicted rent using K Nearest Neighbours")

plt.show()


# features = 'bedrooms','bathrooms','size_sqft','min_to_subway','floor','building_age_yrs'

JimsFlat = [[2, 1, 620, 16, 5,10]]

predict = regressor.predict(JimsFlat)

print("You could charge a rental price of: $%.2f" % predict)

You could charge a rental price of: $3316.04

K Nearest Neighbours (Regression): Manhattan House Price Rentals¶

Practice of K Nearest Neighbour Algorithm to calculate rental price from existing data¶

Import Libraries¶

Import file and inspect data¶

Create two smaller dataframes, one with features and one with rent¶

Split the data into training and test data¶

Compare the accuracy of the training/test data using a value of K between 1 and 100¶

This model is giving most accuracy with a K value of 22¶

Set up regressor using this k value of 22¶

Compare the actual rent prices to the ones predicted by the model¶

Calculate a good rental price to charge for my flat¶