Using rental prices of apartments in Manhatten
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
dataset = pd.read_csv('ManhattanHousing.csv')
dataset.head()
rental_id | rent | bedrooms | bathrooms | size_sqft | min_to_subway | floor | building_age_yrs | no_fee | has_roofdeck | has_washer_dryer | has_doorman | has_elevator | has_dishwasher | has_patio | has_gym | neighborhood | borough | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1545 | 2550 | 0.0 | 1 | 480 | 9 | 2.0 | 17 | 1 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | Upper East Side | Manhattan |
1 | 2472 | 11500 | 2.0 | 2 | 2000 | 4 | 1.0 | 96 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | Greenwich Village | Manhattan |
2 | 2919 | 4500 | 1.0 | 1 | 916 | 2 | 51.0 | 29 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | Midtown | Manhattan |
3 | 2790 | 4795 | 1.0 | 1 | 975 | 3 | 8.0 | 31 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | Greenwich Village | Manhattan |
4 | 3946 | 17500 | 2.0 | 2 | 4800 | 3 | 4.0 | 136 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | Soho | Manhattan |
I didn't have to remove neighborhood. One-hot encoding this column using .getdummies would have been useful. However, for the purpose of my practice I was happy deleting it.
dataset = dataset.drop(["rental_id","neighborhood","borough"], axis = 1)
labels = dataset.pop("rent")
features = pd.get_dummies(dataset)
from sklearn.model_selection import train_test_split
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size = 0.20)
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer
from sklearn.compose import ColumnTransformer
numerical_features = features.select_dtypes(include=['float64', 'int64'])
numerical_columns = numerical_features.columns
ct = ColumnTransformer([("only numeric", StandardScaler(), numerical_columns)], remainder='passthrough')
features_train_scaled = ct.fit_transform(features_train)
features_test_scaled = ct.transform(features_test)
from tensorflow.keras.models import Sequential
model = Sequential()
from tensorflow.keras.layers import InputLayer
input = InputLayer(input_shape = (features.shape[1], ))
model.add(input)
from tensorflow.keras.layers import Dense
model.add(Dense(64, activation = "relu"))
model.add(Dense(1))
from tensorflow.keras.optimizers import Adam
opt = Adam(learning_rate = 0.1)
model.compile(loss = 'mse', metrics = ['mae'], optimizer = opt)
model.fit(features_train_scaled, labels_train, epochs = 20, batch_size = 10, verbose = 1)
Epoch 1/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 271us/step - loss: 15079048.0000 - mae: 2767.0586 Epoch 2/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 254us/step - loss: 2344400.7500 - mae: 993.8212 Epoch 3/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 255us/step - loss: 2446848.7500 - mae: 997.0007 Epoch 4/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 255us/step - loss: 2236830.0000 - mae: 956.8112 Epoch 5/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 255us/step - loss: 2127814.7500 - mae: 928.3338 Epoch 6/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 254us/step - loss: 2130956.5000 - mae: 909.3269 Epoch 7/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 256us/step - loss: 2413626.2500 - mae: 964.5193 Epoch 8/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 255us/step - loss: 2050661.2500 - mae: 904.1806 Epoch 9/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 254us/step - loss: 1997017.0000 - mae: 890.1552 Epoch 10/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 255us/step - loss: 2249668.0000 - mae: 929.7765 Epoch 11/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 253us/step - loss: 2233610.5000 - mae: 898.0870 Epoch 12/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 253us/step - loss: 2205218.7500 - mae: 929.3832 Epoch 13/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 254us/step - loss: 2125105.5000 - mae: 909.5575 Epoch 14/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 255us/step - loss: 2594584.5000 - mae: 954.1763 Epoch 15/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 256us/step - loss: 2310330.2500 - mae: 926.9595 Epoch 16/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 253us/step - loss: 2161254.7500 - mae: 903.6918 Epoch 17/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 258us/step - loss: 1988439.7500 - mae: 882.6811 Epoch 18/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 258us/step - loss: 2355149.0000 - mae: 944.4242 Epoch 19/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 253us/step - loss: 2118154.0000 - mae: 908.9061 Epoch 20/20 284/284 ━━━━━━━━━━━━━━━━━━━━ 0s 254us/step - loss: 2390192.7500 - mae: 957.8502
<keras.src.callbacks.history.History at 0x17578b790>
res_mse, res_mae = model.evaluate(features_test_scaled, labels_test, verbose = 0)
print(res_mse, res_mae)
2163969.75 912.4317626953125
import numpy as np
my_house_features = np.array([[2, 2, 600, 2, 10, 25, 1, 1, 1, 0, 1, 1, 0, 1]])
my_house_features_df = pd.DataFrame(my_house_features, columns=features.columns)
my_house_feature_scaled = ct.transform(my_house_features_df)
predicted_rent = model.predict(my_house_feature_scaled)
print("Predicted rent for the house:", predicted_rent)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 14ms/step Predicted rent for the house: [[3917.4375]]