In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
import kxy
In [2]:
# The training data can be downloaded here:
# https://www.kaggle.com/c/house-prices-advanced-regression-techniques
url = 'kaggle_house_prices_advanced_regression.csv'
df = pd.read_csv(url)
df = df.set_index(['Id'])
In [3]:
y_column = 'SalePrice'
x_columns = [_ for _ in df.columns if _ != y_column]
# Taking the log of the sale price just because the Kaggle
# competition uses as evaluation metric the RMSE of the log price.
df[y_column] = np.log(df[y_column])
In [4]:
# Calculating achievable performance
df.kxy.achievable_performance_analysis(y_column, problem='regression', \
                                       space='dual', categorical_encoding='one-hot')
Out[4]:
Achievable R^2 Achievable True Log-Likelihood Per Sample Achievable RMSE
1 -20.0342 7.95351e-10
In [5]:
# Model Free Variable Selection Analysis (i.e. where is the juice in your data)
df.kxy.variable_selection_analysis(y_column, problem='regression', \
                                   space='dual', categorical_encoding='one-hot')
Out[5]:
Selection Order Univariate Achievable R^2 Maximum Marginal R^2 Increase Running Achievable R^2 Running Achievable R^2 (%) Univariate Achievable RMSE Maximum Marginal RMSE Decrease Running Achievable RMSE Running Achievable RMSE/STD (%) Univariate Mutual Information (nats) Conditional Mutual Information (nats) Univariate Maximum True Log-Likelihood Increase Per Sample Maximum Marginal True Log-Likelihood Increase Per Sample Running Mutual Information (nats) Running Mutual Information (%) Running Maximum Log-Likelihood Increase Per Sample
Variable
OverallQual 1 0.626634 0.626634 0.626634 62.6634 0.243996 0.155319 0.243996 61.1037 0.492598 0.492598 0.492598 0.492598 0.492598 2.45878 0.492598
GrLivArea 2 0.506015 0.0893078 0.715942 71.5942 0.280655 0.0311728 0.212823 53.2971 0.352625 0.13669 0.352625 0.13669 0.629288 3.14106 0.629288
LotArea 3 0.203291 0.0638148 0.779757 77.9757 0.356423 0.0254245 0.187399 46.9301 0.113633 0.127223 0.113633 0.127223 0.756511 3.77609 0.756511
1stFlrSF 4 0.320133 0.0413487 0.821105 82.1105 0.329251 0.0185049 0.168894 42.2959 0.192929 0.103968 0.192929 0.103968 0.860479 4.29504 0.860479
GarageArea 5 0.411432 0.0293539 0.850459 85.0459 0.306347 0.0144769 0.154417 38.6705 0.265031 0.089614 0.265031 0.089614 0.950093 4.74235 0.950093
MSZoning 6 0.141194 0.0208002 0.871259 87.1259 0.370052 0.0111412 0.143276 35.8804 0.076106 0.074885 0.076106 0.074885 1.02498 5.11613 1.02498
FullBath 7 0.38991 0.0162532 0.887513 88.7513 0.311898 0.00934914 0.133927 33.5391 0.247074 0.067479 0.247074 0.067479 1.09246 5.45295 1.09246
GarageCars 8 0.458333 0.00701121 0.894524 89.4524 0.293888 0.0042409 0.129686 32.4771 0.306552 0.032178 0.306552 0.032178 1.12464 5.61357 1.12464
Neighborhood 9 0.0967085 0.0104547 0.904978 90.4978 0.379516 0.00659485 0.123091 30.8256 0.050855 0.052191 0.050855 0.052191 1.17683 5.87408 1.17683
Fireplaces 10 0.261519 0.00889586 0.913874 91.3874 0.343151 0.00590342 0.117188 29.3472 0.15158 0.049148 0.15158 0.049148 1.22597 6.1194 1.22597
BsmtFinSF1 11 0.0903087 0.00753889 0.921413 92.1413 0.380858 0.00524637 0.111941 28.0333 0.047325 0.045802 0.047325 0.045802 1.27178 6.34801 1.27178
BsmtFinType1 12 0.197221 0.00718809 0.928601 92.8601 0.357778 0.00524221 0.106699 26.7205 0.109838 0.047962 0.109838 0.047962 1.31974 6.58741 1.31974
BsmtQual 13 0.265618 0.0710339 0.999635 99.9635 0.342197 0.0990726 0.00762646 1.90989 0.154363 2.63839 0.154363 2.63839 3.95813 19.7568 3.95813
BsmtCond 14 0.0404453 0.000362903 0.999998 99.9998 0.391156 0.00708135 0.000545111 0.136512 0.020643 2.63839 0.020643 2.63839 6.59652 32.9262 6.59652
ExterQual 15 0.391831 4.74235e-07 0.999999 99.9999 0.311407 7.44433e-05 0.000470668 0.117869 0.248651 0.146837 0.248651 0.146837 6.74335 33.6592 6.74335
GarageType 16 0.202157 3.07885e-07 0.999999 99.9999 0.356676 5.54146e-05 0.000415253 0.103991 0.112922 0.125264 0.112922 0.125264 6.86862 34.2844 6.86862
GarageFinish 17 0.225895 1.0759e-06 1 100 0.35133 0.000385573 2.96808e-05 0.00743293 0.128024 2.63839 0.128024 2.63839 9.50701 47.4538 9.50701
GarageCond 18 0.118757 5.49661e-09 1 100 0.374855 2.75593e-05 2.12147e-06 0.000531278 0.063211 2.63839 0.063211 2.63839 12.1454 60.6232 12.1454
GarageQual 19 0.102365 2.80814e-11 1 100 0.378325 1.96984e-06 1.51635e-07 3.79738e-05 0.053996 2.63839 0.053996 2.63839 14.7838 73.7926 14.7838
YearRemodAdd 20 0.314475 4.35207e-14 1 100 0.330619 2.49556e-08 1.26679e-07 3.17242e-05 0.188785 0.179817 0.188785 0.179817 14.9636 74.6902 14.9636
FireplaceQu 21 0.265696 1.70974e-14 1 100 0.342179 1.12303e-08 1.15449e-07 2.89118e-05 0.154416 0.09283 0.154416 0.09283 15.0564 75.1535 15.0564
HeatingQC 22 0.225887 1.42109e-14 1 100 0.351332 1.02925e-08 1.05157e-07 2.63343e-05 0.128019 0.093379 0.128019 0.093379 15.1498 75.6196 15.1498
KitchenQual 23 0.327287 7.99361e-15 1 100 0.327515 6.24212e-09 9.89145e-08 2.47711e-05 0.198218 0.061195 0.198218 0.061195 15.211 75.9251 15.211
TotRmsAbvGrd 24 0.274391 1.53211e-14 1 100 0.340147 1.32012e-08 8.57133e-08 2.14651e-05 0.160372 0.143248 0.160372 0.143248 15.3543 76.6401 15.3543
YearBuilt 25 0.414231 7.66054e-15 1 100 0.305618 7.39354e-09 7.83198e-08 1.96135e-05 0.267415 0.090208 0.267415 0.090208 15.4445 77.0904 15.4445
TotalBsmtSF 26 0.350294 9.76996e-15 1 100 0.321865 1.06982e-08 6.76216e-08 1.69344e-05 0.215618 0.146873 0.215618 0.146873 15.5913 77.8235 15.5913
HouseStyle 27 0.0829452 3.10862e-15 1 100 0.382396 3.83166e-09 6.37899e-08 1.59748e-05 0.043294 0.058332 0.043294 0.058332 15.6497 78.1146 15.6497
HalfBath 28 0.116154 6.77236e-15 1 100 0.375408 9.10794e-09 5.4682e-08 1.36939e-05 0.061736 0.154061 0.061736 0.154061 15.8037 78.8836 15.8037
GarageYrBlt 29 0.32121 3.55271e-15 1 100 0.32899 5.35929e-09 4.93227e-08 1.23518e-05 0.193722 0.10315 0.193722 0.10315 15.9069 79.3985 15.9069
MSSubClass 30 0.0404453 2.22045e-15 1 100 0.391156 3.73147e-09 4.55912e-08 1.14174e-05 0.020643 0.078669 0.020643 0.078669 15.9855 79.7912 15.9855
Foundation 31 0.30358 2.88658e-15 1 100 0.333236 5.3915e-09 4.01997e-08 1.00672e-05 0.180901 0.125855 0.180901 0.125855 16.1114 80.4194 16.1114
LotFrontage 32 0.171712 1.33227e-15 1 100 0.363418 2.74525e-09 3.74545e-08 9.37968e-06 0.094197 0.070734 0.094197 0.070734 16.1821 80.7724 16.1821
OpenPorchSF 33 0.22295 9.99201e-16 1 100 0.351998 2.32357e-09 3.51309e-08 8.79779e-06 0.126125 0.064045 0.126125 0.064045 16.2462 81.0921 16.2462
CentralAir 34 0.0967085 7.77156e-16 1 100 0.379516 1.77252e-09 3.33584e-08 8.3539e-06 0.050855 0.051772 0.050855 0.051772 16.298 81.3505 16.298
Exterior1st 35 0.136739 7.77156e-16 1 100 0.371011 1.98377e-09 3.13746e-08 7.85711e-06 0.073519 0.06131 0.073519 0.06131 16.3593 81.6565 16.3593
2ndFlrSF 36 0.201425 9.99201e-16 1 100 0.35684 2.61747e-09 2.87571e-08 7.20162e-06 0.112463 0.087113 0.112463 0.087113 16.4464 82.0914 16.4464
MasVnrType 37 0.156161 4.44089e-16 1 100 0.366814 1.22839e-09 2.75288e-08 6.894e-06 0.084897 0.043655 0.084897 0.043655 16.49 82.3093 16.49
MasVnrArea 38 0.174764 5.55112e-16 1 100 0.362748 1.71511e-09 2.58136e-08 6.46448e-06 0.096043 0.064328 0.096043 0.064328 16.5544 82.6304 16.5544
BsmtFullBath 39 0.0501014 4.44089e-16 1 100 0.389183 1.33647e-09 2.44772e-08 6.12979e-06 0.0257 0.053162 0.0257 0.053162 16.6075 82.8957 16.6075
LotShape 40 0.0988126 3.33067e-16 1 100 0.379073 1.10839e-09 2.33688e-08 5.85222e-06 0.052021 0.04634 0.052021 0.04634 16.6539 83.127 16.6539
WoodDeckSF 41 0.123428 2.22045e-16 1 100 0.37386 6.52203e-10 2.27166e-08 5.68889e-06 0.065868 0.028306 0.065868 0.028306 16.6822 83.2683 16.6822
SaleType 42 0.0934544 2.22045e-16 1 100 0.380199 9.03058e-10 2.18135e-08 5.46274e-06 0.049057 0.040565 0.049057 0.040565 16.7227 83.4708 16.7227
Electrical 43 0.0868179 2.22045e-16 1 100 0.381588 9.2581e-10 2.08877e-08 5.23089e-06 0.04541 0.043369 0.04541 0.043369 16.7661 83.6873 16.7661
PavedDrive 44 0.077299 1.11022e-16 1 100 0.383571 4.18722e-10 2.0469e-08 5.12603e-06 0.040225 0.02025 0.040225 0.02025 16.7864 83.7883 16.7864
BedroomAbvGr 45 0.054614 1.11022e-16 1 100 0.388258 3.16879e-10 2.01521e-08 5.04667e-06 0.028081 0.015602 0.028081 0.015602 16.802 83.8662 16.802
BsmtExposure 46 0.0585594 2.22045e-16 1 100 0.387447 6.50579e-10 1.95015e-08 4.88375e-06 0.030172 0.032816 0.030172 0.032816 16.8348 84.03 16.8348
SaleCondition 47 0.0906762 1.11022e-16 1 100 0.380781 7.88671e-10 1.87129e-08 4.68624e-06 0.047527 0.041282 0.047527 0.041282 16.8761 84.2361 16.8761
Exterior2nd 48 0.136468 1.11022e-16 1 100 0.371069 2.92198e-10 1.84207e-08 4.61307e-06 0.073362 0.015738 0.073362 0.015738 16.8918 84.3146 16.8918
EnclosedPorch 49 0.0475275 0 1 100 0.38971 2.32281e-10 1.81884e-08 4.5549e-06 0.024347 0.01269 0.024347 0.01269 16.9045 84.378 16.9045
Fence 50 0.0394815 1.11022e-16 1 100 0.391353 2.02663e-10 1.79857e-08 4.50414e-06 0.020141 0.011205 0.020141 0.011205 16.9157 84.4339 16.9157
BsmtUnfSF 51 0.0342536 1.11022e-16 1 100 0.392416 8.90137e-10 1.70956e-08 4.28123e-06 0.017427 0.050758 0.017427 0.050758 16.9664 84.6873 16.9664
BldgType 52 0.0188187 2.22045e-16 1 100 0.39554 6.16957e-10 1.64786e-08 4.12672e-06 0.009499 0.036756 0.009499 0.036756 17.0032 84.8707 17.0032
BsmtFinType2 53 0.0373602 1.11022e-16 1 100 0.391785 5.64866e-10 1.59138e-08 3.98526e-06 0.019038 0.03488 0.019038 0.03488 17.0381 85.0448 17.0381
RoofStyle 54 0.0233784 0 1 100 0.39462 2.13612e-10 1.57001e-08 3.93177e-06 0.011828 0.013514 0.011828 0.013514 17.0516 85.1123 17.0516
LotConfig 55 0.0230443 1.11022e-16 1 100 0.394687 3.74096e-10 1.53261e-08 3.83809e-06 0.011657 0.024116 0.011657 0.024116 17.0757 85.2326 17.0757
Condition1 56 0.0232456 0 1 100 0.394647 3.70492e-10 1.49556e-08 3.7453e-06 0.01176 0.024471 0.01176 0.024471 17.1002 85.3548 17.1002
Alley 57 0.0311505 1.11022e-16 1 100 0.393046 4.16074e-10 1.45395e-08 3.64111e-06 0.015823 0.028215 0.015823 0.028215 17.1284 85.4956 17.1284
ExterCond 58 0.0255636 0 1 100 0.394178 2.01905e-10 1.43376e-08 3.59054e-06 0.012948 0.013984 0.012948 0.013984 17.1424 85.5654 17.1424
KitchenAbvGr 59 0.0292536 2.22045e-16 1 100 0.393431 7.76607e-10 1.3561e-08 3.39606e-06 0.014845 0.055688 0.014845 0.055688 17.1981 85.8434 17.1981
Functional 60 0.0178036 0 1 100 0.395744 5.73101e-10 1.29879e-08 3.25254e-06 0.008982 0.04318 0.008982 0.04318 17.2412 86.0589 17.2412
ScreenPorch 61 0.00991452 1.11022e-16 1 100 0.397331 8.20697e-11 1.29058e-08 3.23199e-06 0.004982 0.006339 0.004982 0.006339 17.2476 86.0906 17.2476
OverallCond 62 0.0167718 0 1 100 0.395952 1.96655e-10 1.27092e-08 3.18274e-06 0.008457 0.015355 0.008457 0.015355 17.2629 86.1672 17.2629
BsmtFinSF2 63 0.00166461 1.11022e-16 1 100 0.398983 6.91286e-10 1.20179e-08 3.00962e-06 0.000833 0.055928 0.000833 0.055928 17.3189 86.4464 17.3189
LowQualFinSF 64 0.00417725 0 1 100 0.39848 2.02211e-10 1.18157e-08 2.95898e-06 0.002093 0.016969 0.002093 0.016969 17.3358 86.5311 17.3358
Heating 65 0.0146457 0 1 100 0.39638 1.01097e-10 1.17146e-08 2.93366e-06 0.007377 0.008593 0.007377 0.008593 17.3444 86.574 17.3444
LandSlope 66 0.0023652 0 1 100 0.398843 1.20609e-10 1.15939e-08 2.90346e-06 0.001184 0.010349 0.001184 0.010349 17.3548 86.6256 17.3548
RoofMatl 67 0.00715826 1.11022e-16 1 100 0.397883 1.00039e-10 1.14939e-08 2.87841e-06 0.003592 0.008666 0.003592 0.008666 17.3634 86.6689 17.3634
LandContour 68 0.0111494 0 1 100 0.397083 8.30523e-11 1.14109e-08 2.85761e-06 0.005606 0.007252 0.005606 0.007252 17.3707 86.7051 17.3707
Condition2 69 0.00313707 0 1 100 0.398688 4.78365e-11 1.1363e-08 2.84563e-06 0.001571 0.004201 0.001571 0.004201 17.3749 86.726 17.3749
Street 70 0.00191217 0 1 100 0.398933 5.41887e-12 1.13576e-08 2.84427e-06 0.000957 0.000477 0.000957 0.000477 17.3754 86.7284 17.3754
MoSold 71 0.00427484 0 1 100 0.398461 4.16965e-11 1.13159e-08 2.83383e-06 0.002142 0.003678 0.002142 0.003678 17.3791 86.7468 17.3791
PoolQC 72 0.00340619 0 1 100 0.398634 1.81136e-11 1.12978e-08 2.82929e-06 0.001706 0.001602 0.001706 0.001602 17.3807 86.7548 17.3807
PoolArea 73 0.00319887 7.77156e-16 1 100 0.398676 1.04903e-08 8.07525e-10 2.02227e-07 0.001602 2.63839 0.001602 2.63839 20.019 99.9242 20.019
MiscFeature 74 0.00427484 0 1 100 0.398461 1.37646e-12 8.06148e-10 2.01883e-07 0.002142 0.001706 0.002142 0.001706 20.0207 99.9327 20.0207
BsmtHalfBath 75 1.99998e-05 0 1 100 0.399311 1.61712e-12 8.04531e-10 2.01478e-07 1e-05 0.002008 1e-05 0.002008 20.0228 99.9427 20.0228
3SsnPorch 76 0.00406372 0 1 100 0.398503 3.92696e-12 8.00604e-10 2.00494e-07 0.002036 0.004893 0.002036 0.004893 20.0277 99.9671 20.0277
MiscVal 77 0.00400396 0 1 100 0.398515 1.30232e-12 7.99302e-10 2.00168e-07 0.002006 0.001628 0.002006 0.001628 20.0293 99.9753 20.0293
Utilities 78 0.000263965 0 1 100 0.399262 4.22719e-13 7.98879e-10 2.00062e-07 0.000132 0.000529 0.000132 0.000529 20.0298 99.9779 20.0298
YrSold 79 0.000789688 0 1 100 0.399157 3.52803e-12 7.95351e-10 1.99179e-07 0.000395 0.004426 0.000395 0.004426 20.0342 100 20.0342