from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('ManhattanHousing.csv')
print(df.head())
rental_id rent bedrooms bathrooms size_sqft min_to_subway floor \ 0 1545 2550 0.0 1 480 9 2.0 1 2472 11500 2.0 2 2000 4 1.0 2 2919 4500 1.0 1 916 2 51.0 3 2790 4795 1.0 1 975 3 8.0 4 3946 17500 2.0 2 4800 3 4.0 building_age_yrs no_fee has_roofdeck has_washer_dryer has_doorman \ 0 17 1 1 0 0 1 96 0 0 0 0 2 29 0 1 0 1 3 31 0 0 0 1 4 136 0 0 0 1 has_elevator has_dishwasher has_patio has_gym neighborhood \ 0 1 1 0 1 Upper East Side 1 0 0 0 0 Greenwich Village 2 1 1 0 0 Midtown 3 1 1 0 1 Greenwich Village 4 1 1 0 1 Soho borough 0 Manhattan 1 Manhattan 2 Manhattan 3 Manhattan 4 Manhattan
features = ['bedrooms','bathrooms','size_sqft','min_to_subway','floor','building_age_yrs']
df_features = df[features]
df_rent = df[['rent']]
training_data, test_data, training_rent, test_rent = train_test_split(df_features,df_rent,test_size = 0.2, random_state = 100)
accuracies = []
for k in range(1,101):
regressor = KNeighborsRegressor(n_neighbors = k, weights = "distance")
regressor.fit(training_data,training_rent)
accuracies.append(regressor.score(test_data, test_rent))
k_list = range(1,101)
plt.plot(k_list, accuracies)
plt.axvline(x=22, color='r', linestyle='--')
plt.xlabel('k (Number of Neighbours)')
plt.ylabel('Accuracy')
plt.title('Regression Accuracy using K Nearest Neighbour Algorithm')
plt.show()
regressor = KNeighborsRegressor(n_neighbors = 22, weights = "distance")
regressor.fit(training_data,training_rent)
KNeighborsRegressor(n_neighbors=22, weights='distance')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsRegressor(n_neighbors=22, weights='distance')
predicted_rent = regressor.predict(test_data)
plt.scatter(test_rent, predicted_rent, alpha=0.4)
plt.plot(test_rent,test_rent, alpha = 0.3)
plt.xlabel("Actual Rent Prices")
plt.ylabel("Predicted Rent Prices")
plt.title("Comparing the actual rent with the predicted rent using K Nearest Neighbours")
plt.show()
# features = 'bedrooms','bathrooms','size_sqft','min_to_subway','floor','building_age_yrs'
JimsFlat = [[2, 1, 620, 16, 5,10]]
predict = regressor.predict(JimsFlat)
print("You could charge a rental price of: $%.2f" % predict)
You could charge a rental price of: $3316.04