Connect with me using the links below:
LinkedIn
Resume
GitHub
Email
import pandas as pd
import warnings
from statsmodels.formula.api import ols as sm_ols
from statsmodels.iolib.summary2 import summary_col
with warnings.catch_warnings():
warnings.simplefilter("ignore")
df = pd.read_csv("2023Fall_Freddie_Assignment_Data.csv")
pd.set_option("display.max_columns", None)
df.head()
oyear | v1 | v2 | v3 | v4 | v5 | v6 | v7 | v8 | v9 | v10 | v11 | v12 | v13 | v14 | v15 | v16 | v17 | v18 | v19 | v20 | v21 | v22 | v23 | v24 | v25 | v26 | v27 | v28 | v29 | v30 | v31 | v32 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1999 | 654 | 199906 | Y | 202905 | 23104.0 | 30 | 1 | P | 95 | 45 | 85000 | 95 | 7.000 | R | N | FRM | TX | SF | 76100 | F99Q20302845 | P | 360 | 2 | Other sellers | Other servicers | NaN | NaN | 9.0 | NaN | 9 | N | NaN |
1 | 1999 | 768 | 199907 | N | 201906 | NaN | 0 | 1 | P | 72 | 33 | 154000 | 72 | 7.125 | R | N | FRM | MN | SF | 55700 | F99Q20165628 | C | 240 | 1 | NORWEST MORTGAGE, INC. | WELLS FARGO HOME MORTGAGE, INC. | NaN | NaN | 9.0 | NaN | 9 | N | NaN |
2 | 1999 | 760 | 200002 | N | 201501 | NaN | 0 | 1 | P | 80 | 41 | 240000 | 80 | 7.625 | T | N | FRM | MN | SF | 55300 | F99Q40076986 | C | 180 | 2 | FIFTH THIRD BANK | Other servicers | NaN | NaN | 9.0 | NaN | 9 | N | NaN |
3 | 1999 | 756 | 200002 | N | 203001 | NaN | 0 | 1 | P | 23 | 21 | 20000 | 23 | 8.250 | T | N | FRM | MD | SF | 21600 | F99Q40230828 | P | 360 | 1 | NORWEST MORTGAGE, INC. | WELLS FARGO BANK, N.A. | NaN | NaN | 9.0 | NaN | 9 | N | NaN |
4 | 1999 | 738 | 199911 | N | 201410 | NaN | 0 | 1 | S | 80 | 28 | 71000 | 80 | 7.875 | R | N | FRM | MN | SF | 55800 | F99Q30198593 | P | 180 | 2 | NORWEST MORTGAGE, INC. | WELLS FARGO HOME MORTGAGE, INC. | NaN | NaN | 9.0 | NaN | 9 | N | NaN |
df.columns
Index(['oyear', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'v8', 'v9', 'v10',
'v11', 'v12', 'v13', 'v14', 'v15', 'v16', 'v17', 'v18', 'v19', 'v20',
'v21', 'v22', 'v23', 'v24', 'v25', 'v26', 'v27', 'v28', 'v29', 'v30',
'v31', 'v32'],
dtype='object')
new_column_names = {
"v1": "CREDIT SCORE",
"v2": "FIRST PAYMENT DATE",
"v3": "FIRST TIME HOMEBUYER FLAG",
"v4": "MATURITY DATE",
"v5": "MSA",
"v6": "MORTGAGE INSURANCE PERCENTAGE (MI %)",
"v7": "NUMBER OF UNITS",
"v8": "OCCUPANCY STATUS",
"v9": "ORIGINAL COMBINED LOAN-TO-VALUE (CLTV)",
"v10": "ORIGINAL DEBT-TO-INCOME (DTI) RATIO",
"v11": "ORIGINAL UPB",
"v12": "ORIGINAL LOAN-TO-VALUE (LTV)",
"v13": "ORIGINAL INTEREST RATE",
"v14": "CHANNEL",
"v15": "PREPAYMENT PENALTY MORTGAGE (PPM) FLAG",
"v16": "AMORTIZATION TYPE",
"v17": "PROPERTY STATE",
"v18": "PROPERTY TYPE",
"v19": "POSTAL CODE",
"v20": "LOAN SEQUENCE NUMBER",
"v21": "LOAN PURPOSE",
"v22": "ORIGINAL LOAN TERM",
"v23": "NUMBER OF BORROWERS",
"v24": "SELLER NAME",
"v25": "SERVICER NAME",
"v26": "SUPER CONFORMING FLAG",
"v27": "PRE-RELIEF REFINANCE LOAN SEQUENCE NUMBER",
"v28": "PROGRAM INDICATOR",
"v29": "RELIEF REFINANCE INDICATOR",
"v30": "PROPERTY VALUATION METHOD",
"v31": "INTEREST ONLY INDICATOR (I/O INDICATOR)",
"v32": "MI CANCELLATION INDICATOR",
}
df.rename(columns=new_column_names, inplace=True)
df.columns
Index(['oyear', 'CREDIT SCORE', 'FIRST PAYMENT DATE',
'FIRST TIME HOMEBUYER FLAG', 'MATURITY DATE', 'MSA',
'MORTGAGE INSURANCE PERCENTAGE (MI %)', 'NUMBER OF UNITS',
'OCCUPANCY STATUS', 'ORIGINAL COMBINED LOAN-TO-VALUE (CLTV)',
'ORIGINAL DEBT-TO-INCOME (DTI) RATIO', 'ORIGINAL UPB',
'ORIGINAL LOAN-TO-VALUE (LTV)', 'ORIGINAL INTEREST RATE', 'CHANNEL',
'PREPAYMENT PENALTY MORTGAGE (PPM) FLAG', 'AMORTIZATION TYPE',
'PROPERTY STATE', 'PROPERTY TYPE', 'POSTAL CODE',
'LOAN SEQUENCE NUMBER', 'LOAN PURPOSE', 'ORIGINAL LOAN TERM',
'NUMBER OF BORROWERS', 'SELLER NAME', 'SERVICER NAME',
'SUPER CONFORMING FLAG', 'PRE-RELIEF REFINANCE LOAN SEQUENCE NUMBER',
'PROGRAM INDICATOR', 'RELIEF REFINANCE INDICATOR',
'PROPERTY VALUATION METHOD', 'INTEREST ONLY INDICATOR (I/O INDICATOR)',
'MI CANCELLATION INDICATOR'],
dtype='object')
df["CREDIT SCORE"] = df["CREDIT SCORE"].replace(9999, pd.NA)
df["FIRST TIME HOMEBUYER FLAG"] = df["FIRST TIME HOMEBUYER FLAG"].replace("9",pd.NA)
df["MORTGAGE INSURANCE PERCENTAGE (MI %)"] = df["MORTGAGE INSURANCE PERCENTAGE (MI %)"].replace(999, pd.NA)
df["NUMBER OF UNITS"] = df["NUMBER OF UNITS"].replace(99, pd.NA)
df["OCCUPANCY STATUS"] = df["OCCUPANCY STATUS"].replace("9",pd.NA)
df["ORIGINAL COMBINED LOAN-TO-VALUE (CLTV)"] = df["ORIGINAL COMBINED LOAN-TO-VALUE (CLTV)"].replace(999, pd.NA)
df["ORIGINAL DEBT-TO-INCOME (DTI) RATIO"] = df["ORIGINAL DEBT-TO-INCOME (DTI) RATIO"].replace(999, pd.NA)
df["ORIGINAL LOAN-TO-VALUE (LTV)"] = df["ORIGINAL LOAN-TO-VALUE (LTV)"].replace(999, pd.NA)
df["CHANNEL"] = df["CHANNEL"].replace("9", pd.NA)
df["PROPERTY TYPE"] = df["PROPERTY TYPE"].replace("99", pd.NA)
df["LOAN PURPOSE"] = df["LOAN PURPOSE"].replace("9", pd.NA)
df["NUMBER OF BORROWERS"] = df["NUMBER OF BORROWERS"].replace(99, pd.NA)
df["PROGRAM INDICATOR"] = df["PROGRAM INDICATOR"].replace(9, pd.NA)
df["PROPERTY VALUATION METHOD"] = df["PROPERTY VALUATION METHOD"].replace(9, pd.NA)
df["MI CANCELLATION INDICATOR"] = df["MI CANCELLATION INDICATOR"].replace("7", pd.NA)
df1 = df.copy()
df1["First"] = df1["FIRST TIME HOMEBUYER FLAG"].map({"Y": 1, "N": 0})
df1["Penalty"] = df1["PREPAYMENT PENALTY MORTGAGE (PPM) FLAG"].map({"Y" : 1, "N": 0})
new_names = {"ORIGINAL INTEREST RATE" : "Rate",
"ORIGINAL UPB" : "UPB",
"ORIGINAL LOAN TERM" : "Term",
"ORIGINAL LOAN-TO-VALUE (LTV)" : "LTV",
"ORIGINAL DEBT-TO-INCOME (DTI) RATIO" : "DTI",
"CREDIT SCORE" : "FICO",
"NUMBER OF BORROWERS" : "Borrowers",
}
df1.rename(columns=new_names, inplace=True)
numeric_columns = ["LTV", "DTI", "FICO", "Borrowers"]
for column in numeric_columns:
df1[column] = pd.to_numeric(df1[column], errors="coerce")
# Selecting the variables
variables = ["Rate",
"UPB",
"Term",
"LTV",
"DTI",
"FICO",
"Borrowers",
"First",
"Penalty"]
# Creating summary table
summary_table = df1[variables].describe(percentiles=[])
# Renaming the 50% percentile column to median
summary_table = summary_table.rename(index={"50%": "median"})
summary_table
Rate | UPB | Term | LTV | DTI | FICO | Borrowers | First | Penalty | |
---|---|---|---|---|---|---|---|---|---|
count | 72750.000000 | 7.275000e+04 | 72750.000000 | 72749.000000 | 66972.000000 | 72613.000000 | 72735.000000 | 72698.00000 | 72750.000000 |
mean | 5.130097 | 2.035716e+05 | 314.051395 | 71.683776 | 33.842606 | 740.687824 | 1.557682 | 0.12955 | 0.001306 |
std | 1.423189 | 1.218134e+05 | 77.145301 | 18.859577 | 10.915987 | 52.472380 | 0.501678 | 0.33581 | 0.036113 |
min | 1.750000 | 8.000000e+03 | 60.000000 | 3.000000 | 1.000000 | 300.000000 | 1.000000 | 0.00000 | 0.000000 |
median | 4.989500 | 1.750000e+05 | 360.000000 | 75.000000 | 34.000000 | 751.000000 | 2.000000 | 0.00000 | 0.000000 |
max | 13.700000 | 1.200000e+06 | 480.000000 | 312.000000 | 65.000000 | 839.000000 | 4.000000 | 1.00000 | 1.000000 |
formatting_rules = {
"Rate": '{:.1f}',
"UPB": '{:,.0f}',
"Term": '{:,.0f}',
"LTV": '{:.1f}',
"DTI": '{:.1f}',
"FICO": '{:,.0f}',
"Borrowers": '{:.1f}',
"First": '{:.2f}',
"Penalty": '{:.3f}',
}
formatted_summary_table = summary_table.style.format(formatting_rules)
formatted_summary_table
Rate | UPB | Term | LTV | DTI | FICO | Borrowers | First | Penalty | |
---|---|---|---|---|---|---|---|---|---|
count | 72750.0 | 72,750 | 72,750 | 72749.0 | 66972.0 | 72,613 | 72735.0 | 72698.00 | 72750.000 |
mean | 5.1 | 203,572 | 314 | 71.7 | 33.8 | 741 | 1.6 | 0.13 | 0.001 |
std | 1.4 | 121,813 | 77 | 18.9 | 10.9 | 52 | 0.5 | 0.34 | 0.036 |
min | 1.8 | 8,000 | 60 | 3.0 | 1.0 | 300 | 1.0 | 0.00 | 0.000 |
median | 5.0 | 175,000 | 360 | 75.0 | 34.0 | 751 | 2.0 | 0.00 | 0.000 |
max | 13.7 | 1,200,000 | 480 | 312.0 | 65.0 | 839 | 4.0 | 1.00 | 1.000 |
variables = ["Rate",
"UPB",
"Term",
"LTV",
"DTI",
"FICO",
"Borrowers",
"First",
"Penalty"]
corr_matrix = df1[variables].corr().style.format("{:.2f}")
corr_matrix
Rate | UPB | Term | LTV | DTI | FICO | Borrowers | First | Penalty | |
---|---|---|---|---|---|---|---|---|---|
Rate | 1.00 | -0.32 | 0.18 | 0.05 | 0.05 | -0.30 | 0.04 | 0.00 | 0.04 |
UPB | -0.32 | 1.00 | 0.21 | 0.13 | 0.12 | 0.13 | 0.09 | 0.06 | -0.03 |
Term | 0.18 | 0.21 | 1.00 | 0.30 | 0.16 | -0.05 | -0.08 | 0.17 | -0.03 |
LTV | 0.05 | 0.13 | 0.30 | 1.00 | 0.15 | -0.13 | -0.07 | 0.26 | -0.02 |
DTI | 0.05 | 0.12 | 0.16 | 0.15 | 1.00 | -0.16 | -0.11 | 0.04 | -0.02 |
FICO | -0.30 | 0.13 | -0.05 | -0.13 | -0.16 | 1.00 | -0.03 | -0.01 | -0.01 |
Borrowers | 0.04 | 0.09 | -0.08 | -0.07 | -0.11 | -0.03 | 1.00 | -0.09 | 0.00 |
First | 0.00 | 0.06 | 0.17 | 0.26 | 0.04 | -0.01 | -0.09 | 1.00 | -0.01 |
Penalty | 0.04 | -0.03 | -0.03 | -0.02 | -0.02 | -0.01 | 0.00 | -0.01 | 1.00 |
df1["Purchase"] = (df1["LOAN PURPOSE"] == "P").astype(int)
df1["< 680"] = (df1["FICO"] < 680).astype(int)
df1["Single"] = (df1["Borrowers"] == 1).astype(int)
variables = ["oyear", "Rate", "Purchase", "UPB", "LTV", "DTI", "FICO", "< 680", "Single"]
df_subset = df1[variables]
grouped_means = df_subset.groupby("oyear").mean()
#Creating the row with the general averages for every variable
overall_mean = df_subset.mean()
#Adding the overall means row to the table
grouped_means.loc["Overall Average"] = overall_mean
#Printing the table
grouped_means
Rate | Purchase | UPB | LTV | DTI | FICO | < 680 | Single | |
---|---|---|---|---|---|---|---|---|
oyear | ||||||||
1999 | 7.337683 | 0.486000 | 113821.333333 | 71.449667 | 31.742957 | 715.204637 | 0.247333 | 0.346000 |
2000 | 8.122338 | 0.691667 | 124045.000000 | 72.887667 | 33.594336 | 716.345270 | 0.253333 | 0.384333 |
2001 | 6.877263 | 0.302667 | 135673.333333 | 70.973333 | 31.750000 | 720.714334 | 0.221333 | 0.341667 |
2002 | 6.392651 | 0.258333 | 143730.333333 | 69.056019 | 31.899796 | 722.652862 | 0.215333 | 0.334667 |
2003 | 5.507776 | 0.178333 | 145634.333333 | 66.001333 | 30.800136 | 730.778593 | 0.172000 | 0.343333 |
2004 | 5.667718 | 0.360667 | 154214.333333 | 68.930667 | 33.350746 | 722.479118 | 0.231333 | 0.374667 |
2005 | 5.812673 | 0.376000 | 170954.666667 | 69.568667 | 35.384536 | 723.030030 | 0.238000 | 0.417333 |
2006 | 6.391042 | 0.473000 | 176869.333333 | 69.732667 | 36.521397 | 722.490824 | 0.245667 | 0.432667 |
2007 | 6.366955 | 0.453000 | 183082.333333 | 71.422000 | 36.643906 | 726.013009 | 0.232333 | 0.465667 |
2008 | 6.032981 | 0.421333 | 205754.333333 | 70.190333 | 36.079208 | 743.551667 | 0.125667 | 0.473667 |
2009 | 4.985427 | 0.213333 | 215246.000000 | 66.443333 | 31.847752 | 760.279333 | 0.047667 | 0.388667 |
2010 | 4.716799 | 0.208333 | 208135.666667 | 68.954000 | 31.924307 | 759.773333 | 0.059000 | 0.404333 |
2011 | 4.426142 | 0.220000 | 205856.000000 | 69.086667 | 31.828558 | 760.425667 | 0.056333 | 0.390333 |
2012 | 3.744633 | 0.169667 | 205508.000000 | 76.430667 | 31.146231 | 758.083000 | 0.069000 | 0.421667 |
2013 | 3.927846 | 0.280000 | 202030.333333 | 75.462000 | 32.149458 | 751.729910 | 0.093333 | 0.448667 |
2014 | 4.308486 | 0.498333 | 208097.666667 | 74.337000 | 33.705002 | 745.990333 | 0.101333 | 0.490667 |
2015 | 3.981578 | 0.431667 | 226880.666667 | 73.478000 | 33.502922 | 750.394667 | 0.089667 | 0.479333 |
2016 | 3.789911 | 0.450333 | 236025.666667 | 73.257667 | 33.798460 | 748.457667 | 0.087000 | 0.495000 |
2017 | 4.188570 | 0.564000 | 229910.333333 | 75.075667 | 34.910597 | 745.468489 | 0.097667 | 0.525000 |
2018 | 4.745135 | 0.671333 | 229418.000000 | 75.840000 | 35.770812 | 746.669113 | 0.095667 | 0.544667 |
2019 | 4.244280 | 0.566667 | 255544.333333 | 76.193333 | 35.344678 | 750.195667 | 0.078667 | 0.537333 |
2020 | 3.193679 | 0.294333 | 290616.333333 | 71.143000 | 33.276092 | 758.419000 | 0.048333 | 0.502333 |
2021 | 2.971172 | 0.353333 | 291863.333333 | 69.328667 | 34.100667 | 750.557853 | 0.081333 | 0.540333 |
2022 | 5.089418 | 0.671000 | 302796.333333 | 73.870333 | 37.204667 | 743.823608 | 0.100667 | 0.543333 |
2023 | 6.330771 | 0.825333 | 299610.666667 | 76.872000 | 38.368000 | 748.901333 | 0.076000 | 0.617333 |
Overall Average | 5.130097 | 0.404110 | 203571.573883 | 71.683776 | 33.842606 | 740.687824 | 0.136371 | 0.444536 |
formatting_rules = {
"Rate": '{:.1f}',
"Purchase": '{:.2f}',
"UPB": '{:,.0f}',
"LTV": '{:.1f}',
"DTI": '{:.1f}',
"FICO": '{:,.0f}',
"< 680": '{:.2f}',
"Single": '{:.2f}',
}
formatted_grouped_means = grouped_means.style.format(formatting_rules)
formatted_grouped_means
Rate | Purchase | UPB | LTV | DTI | FICO | < 680 | Single | |
---|---|---|---|---|---|---|---|---|
oyear | ||||||||
1999 | 7.3 | 0.49 | 113,821 | 71.4 | 31.7 | 715 | 0.25 | 0.35 |
2000 | 8.1 | 0.69 | 124,045 | 72.9 | 33.6 | 716 | 0.25 | 0.38 |
2001 | 6.9 | 0.30 | 135,673 | 71.0 | 31.8 | 721 | 0.22 | 0.34 |
2002 | 6.4 | 0.26 | 143,730 | 69.1 | 31.9 | 723 | 0.22 | 0.33 |
2003 | 5.5 | 0.18 | 145,634 | 66.0 | 30.8 | 731 | 0.17 | 0.34 |
2004 | 5.7 | 0.36 | 154,214 | 68.9 | 33.4 | 722 | 0.23 | 0.37 |
2005 | 5.8 | 0.38 | 170,955 | 69.6 | 35.4 | 723 | 0.24 | 0.42 |
2006 | 6.4 | 0.47 | 176,869 | 69.7 | 36.5 | 722 | 0.25 | 0.43 |
2007 | 6.4 | 0.45 | 183,082 | 71.4 | 36.6 | 726 | 0.23 | 0.47 |
2008 | 6.0 | 0.42 | 205,754 | 70.2 | 36.1 | 744 | 0.13 | 0.47 |
2009 | 5.0 | 0.21 | 215,246 | 66.4 | 31.8 | 760 | 0.05 | 0.39 |
2010 | 4.7 | 0.21 | 208,136 | 69.0 | 31.9 | 760 | 0.06 | 0.40 |
2011 | 4.4 | 0.22 | 205,856 | 69.1 | 31.8 | 760 | 0.06 | 0.39 |
2012 | 3.7 | 0.17 | 205,508 | 76.4 | 31.1 | 758 | 0.07 | 0.42 |
2013 | 3.9 | 0.28 | 202,030 | 75.5 | 32.1 | 752 | 0.09 | 0.45 |
2014 | 4.3 | 0.50 | 208,098 | 74.3 | 33.7 | 746 | 0.10 | 0.49 |
2015 | 4.0 | 0.43 | 226,881 | 73.5 | 33.5 | 750 | 0.09 | 0.48 |
2016 | 3.8 | 0.45 | 236,026 | 73.3 | 33.8 | 748 | 0.09 | 0.49 |
2017 | 4.2 | 0.56 | 229,910 | 75.1 | 34.9 | 745 | 0.10 | 0.53 |
2018 | 4.7 | 0.67 | 229,418 | 75.8 | 35.8 | 747 | 0.10 | 0.54 |
2019 | 4.2 | 0.57 | 255,544 | 76.2 | 35.3 | 750 | 0.08 | 0.54 |
2020 | 3.2 | 0.29 | 290,616 | 71.1 | 33.3 | 758 | 0.05 | 0.50 |
2021 | 3.0 | 0.35 | 291,863 | 69.3 | 34.1 | 751 | 0.08 | 0.54 |
2022 | 5.1 | 0.67 | 302,796 | 73.9 | 37.2 | 744 | 0.10 | 0.54 |
2023 | 6.3 | 0.83 | 299,611 | 76.9 | 38.4 | 749 | 0.08 | 0.62 |
Overall Average | 5.1 | 0.40 | 203,572 | 71.7 | 33.8 | 741 | 0.14 | 0.44 |
df1 = df1.rename(columns={'SELLER NAME': 'Seller'})
#Creating the initial dataframe only with the counts for each seller, and calling the column Frequency
seller_loan_counts = df1.groupby("Seller").size().reset_index(name = "Frequency")
#Sorting the dataframe in descending order
seller_loan_counts_sorted = seller_loan_counts.sort_values(by='Frequency', ascending=False)
#Creating the Percent column
seller_loan_counts_sorted['Percent'] = (
(seller_loan_counts_sorted['Frequency'] / seller_loan_counts_sorted['Frequency'].sum()) * 100
)
#Creating the Number column
seller_loan_counts_sorted['Number'] = range(1, len(seller_loan_counts_sorted) + 1)
#Creating the cumulative percent column
seller_loan_counts_sorted['Cumulative'] = seller_loan_counts_sorted['Percent'].cumsum()
#Ordering the columns for display
seller_loan_counts_sorted = seller_loan_counts_sorted[['Number', 'Seller', 'Frequency', 'Percent', "Cumulative"]]
#Changing display options so that all rows are displayed
pd.set_option("display.max_rows", None)
#Making the Number the index of the dataframe
seller_loan_counts_sorted.set_index('Number', inplace=True)
formatting_rules = {
"Percent": '{:.2f}',
"Cumulative": '{:.2f}',
}
seller_loan_counts_sorted_formatted = seller_loan_counts_sorted.style.format(formatting_rules)
seller_loan_counts_sorted_formatted
Seller | Frequency | Percent | Cumulative | |
---|---|---|---|---|
Number | ||||
1 | Other sellers | 18989 | 26.10 | 26.10 |
2 | WELLS FARGO BANK, N.A. | 10152 | 13.95 | 40.06 |
3 | WELLS FARGO HOME MORTGAGE, INC. | 3667 | 5.04 | 45.10 |
4 | BANK OF AMERICA, N.A. | 3557 | 4.89 | 49.99 |
5 | U.S. BANK N.A. | 3168 | 4.35 | 54.34 |
6 | ABN AMRO MORTGAGE GROUP, INC. | 2435 | 3.35 | 57.69 |
7 | JPMORGAN CHASE BANK, N.A. | 2092 | 2.88 | 60.56 |
8 | CHASE HOME FINANCE LLC | 1658 | 2.28 | 62.84 |
9 | BRANCH BANKING & TRUST COMPANY | 1511 | 2.08 | 64.92 |
10 | COUNTRYWIDE HOME LOANS, INC. | 1383 | 1.90 | 66.82 |
11 | QUICKEN LOANS INC. | 1306 | 1.80 | 68.62 |
12 | CITIMORTGAGE, INC. | 1095 | 1.51 | 70.12 |
13 | WASHINGTON MUTUAL BANK | 1075 | 1.48 | 71.60 |
14 | FIFTH THIRD BANK | 1073 | 1.47 | 73.07 |
15 | CHASE MANHATTAN MORTGAGE CORPORATION | 932 | 1.28 | 74.35 |
16 | AMERIHOME MORTGAGE COMPANY, LLC | 926 | 1.27 | 75.63 |
17 | JPMORGAN CHASE BANK, NATIONAL ASSOCIATION | 856 | 1.18 | 76.80 |
18 | NORWEST MORTGAGE, INC. | 837 | 1.15 | 77.95 |
19 | PRINCIPAL RESIDENTIAL MORTGAGE, INC. | 829 | 1.14 | 79.09 |
20 | CALIBER HOME LOANS, INC. | 801 | 1.10 | 80.20 |
21 | PROVIDENT FUNDING ASSOCIATES, L.P. | 783 | 1.08 | 81.27 |
22 | NATIONAL CITY MORTGAGE CO. | 751 | 1.03 | 82.30 |
23 | UNITED WHOLESALE MORTGAGE, LLC | 669 | 0.92 | 83.22 |
24 | LOANDEPOT.COM, LLC | 661 | 0.91 | 84.13 |
25 | FLAGSTAR BANK, FSB | 659 | 0.91 | 85.04 |
26 | TAYLOR, BEAN & WHITAKER MORTGAGE CORP. | 608 | 0.84 | 85.87 |
27 | PENNYMAC CORP. | 569 | 0.78 | 86.66 |
28 | GMAC MORTGAGE, LLC | 563 | 0.77 | 87.43 |
29 | SUNTRUST MORTGAGE, INC. | 552 | 0.76 | 88.19 |
30 | GMAC MORTGAGE CORPORATION | 506 | 0.70 | 88.88 |
31 | FAIRWAY INDEPENDENT MORTGAGE CORPORATION | 484 | 0.67 | 89.55 |
32 | GUARANTEED RATE, INC. | 427 | 0.59 | 90.14 |
33 | UNITED SHORE FINANCIAL SERVICES, LLC | 424 | 0.58 | 90.72 |
34 | NATIONSTAR MORTGAGE LLC DBA MR. COOPER | 415 | 0.57 | 91.29 |
35 | ROCKET MORTGAGE, LLC | 403 | 0.55 | 91.84 |
36 | QUICKEN LOANS, LLC | 328 | 0.45 | 92.29 |
37 | CITIZENS BANK, NA | 291 | 0.40 | 92.69 |
38 | NEWREZ LLC | 275 | 0.38 | 93.07 |
39 | FRANKLIN AMERICAN MORTGAGE COMPANY | 274 | 0.38 | 93.45 |
40 | NATIONSTAR MORTGAGE LLC | 241 | 0.33 | 93.78 |
41 | HOME POINT FINANCIAL CORPORATION | 228 | 0.31 | 94.09 |
42 | TRUIST BANK | 219 | 0.30 | 94.39 |
43 | METLIFE HOME LOANS, A DIVISION OF METLIFE BANK, N.A. | 216 | 0.30 | 94.69 |
44 | STEARNS LENDING, LLC | 215 | 0.30 | 94.99 |
45 | WASHINGTON MUTUAL BANK, F.A. | 204 | 0.28 | 95.27 |
46 | UNITED SHORE FINANCIAL SERVICES, LLC, DBA UNITED WHOLESALE M | 203 | 0.28 | 95.55 |
47 | PHH MORTGAGE CORPORATION | 177 | 0.24 | 95.79 |
48 | PENNYMAC LOAN SERVICES, LLC | 171 | 0.24 | 96.02 |
49 | CENDANT MORTGAGE CORPORATION, DBA PHH MORTGAGE SERVICES, INC | 166 | 0.23 | 96.25 |
50 | COUNTRYWIDE BANK, FSB | 155 | 0.21 | 96.47 |
51 | HSBC BANK USA | 136 | 0.19 | 96.65 |
52 | UNITED SHORE FINANCIAL SERVICES, LLC., DBA SHORE MORTGAGE | 118 | 0.16 | 96.82 |
53 | AMTRUST BANK | 117 | 0.16 | 96.98 |
54 | UNION SAVINGS BANK | 107 | 0.15 | 97.12 |
55 | LASALLE BANK, FSB | 95 | 0.13 | 97.25 |
56 | CMG MORTGAGE, INC. | 93 | 0.13 | 97.38 |
57 | PRIMELENDING A PLAINS CAPITAL CO | 92 | 0.13 | 97.51 |
58 | NATIONSBANK, N.A.,DBA BANK OF AMERICA MORTGAGE | 90 | 0.12 | 97.63 |
59 | PNC BANK, NA | 83 | 0.11 | 97.75 |
60 | PRIMELENDING, A PLAINSCAPITAL COMPANY | 82 | 0.11 | 97.86 |
61 | SUNTRUST BANK | 82 | 0.11 | 97.97 |
62 | FIRSTAR BANK, N.A. | 81 | 0.11 | 98.08 |
63 | U.S. BANK, N.A. | 81 | 0.11 | 98.19 |
64 | NATIONAL CITY MORTGAGE COMPANY | 79 | 0.11 | 98.30 |
65 | GUILD MORTGAGE COMPANY LLC | 68 | 0.09 | 98.40 |
66 | GE CAPITAL MORTGAGE SERVICES, INC. | 66 | 0.09 | 98.49 |
67 | FINANCE OF AMERICA MORTGAGE LLC | 65 | 0.09 | 98.58 |
68 | FIRST UNION MORTGAGE CORPORATION | 62 | 0.09 | 98.66 |
69 | CROSSCOUNTRY MORTGAGE, LLC | 51 | 0.07 | 98.73 |
70 | FIRST HORIZON HOME LOANS, A DIVISION OF FIRST TENNESSEE BANK | 51 | 0.07 | 98.80 |
71 | LAKEVIEW LOAN SERVICING, LLC | 48 | 0.07 | 98.87 |
72 | FLEET MORTGAGE CORPORATION | 45 | 0.06 | 98.93 |
73 | TEXAS CAPITAL BANK, N.A. | 43 | 0.06 | 98.99 |
74 | TEXAS CAPITAL BANK | 43 | 0.06 | 99.05 |
75 | USAA FEDERAL SAVINGS BANK | 43 | 0.06 | 99.11 |
76 | GREENLIGHT FINANCIAL SERVICES | 43 | 0.06 | 99.17 |
77 | STEARNS LENDING, INC. | 42 | 0.06 | 99.22 |
78 | NATIONSBANK, N.A. | 40 | 0.05 | 99.28 |
79 | BRANCH BANKING AND TRUST COMPANY | 35 | 0.05 | 99.33 |
80 | STONEGATE MORTGAGE CORPORATION | 34 | 0.05 | 99.37 |
81 | HSBC BANK | 33 | 0.05 | 99.42 |
82 | GUILD MORTGAGE COMPANY | 32 | 0.04 | 99.46 |
83 | WACHOVIA BANK, N.A. | 30 | 0.04 | 99.50 |
84 | FIFTH THIRD BANK, NATIONAL ASSOCIATION | 29 | 0.04 | 99.54 |
85 | AMERISAVE MORTGAGE CORPORATION | 26 | 0.04 | 99.58 |
86 | WITMER FUNDING, LLC | 25 | 0.03 | 99.61 |
87 | CHARTER ONE BANK, N. A. | 25 | 0.03 | 99.65 |
88 | FIFTH THIRD MORTGAGE COMPANY | 21 | 0.03 | 99.68 |
89 | COLORADO FEDERAL SAVINGS BANK | 20 | 0.03 | 99.70 |
90 | STEARNS LENDING, LLC. | 19 | 0.03 | 99.73 |
91 | FLAGSTAR BANK, NATIONAL ASSOCIATION | 19 | 0.03 | 99.76 |
92 | CALIBER FUNDING LLC | 17 | 0.02 | 99.78 |
93 | GUARANTY BANK, SSB | 15 | 0.02 | 99.80 |
94 | FIRST HORIZON HOME LOAN CORPORATION | 14 | 0.02 | 99.82 |
95 | DIME SAVINGS BANK OF NEW YORK, FSB | 13 | 0.02 | 99.84 |
96 | CHICAGO MORTGAGE SOLUTIONS CORP DBA INTERBANK MORTGAGE CO. | 13 | 0.02 | 99.86 |
97 | MOVEMENT MORTGAGE, LLC | 12 | 0.02 | 99.87 |
98 | ALLY BANK | 12 | 0.02 | 99.89 |
99 | IMPAC MORTGAGE CORP. | 12 | 0.02 | 99.91 |
100 | PLANET HOME LENDING, LLC | 12 | 0.02 | 99.92 |
101 | NORTHPOINTE BANK | 10 | 0.01 | 99.94 |
102 | PROSPECT MORTGAGE, LLC | 10 | 0.01 | 99.95 |
103 | MARINE MIDLAND BANK | 9 | 0.01 | 99.96 |
104 | SOVEREIGN BANK | 7 | 0.01 | 99.97 |
105 | PACIFIC UNION FINANCIAL, LLC | 7 | 0.01 | 99.98 |
106 | DITECH FINANCIAL LLC | 5 | 0.01 | 99.99 |
107 | TAYLOR, BEAN & WHITAKER MORTGAGE CORPORATION | 5 | 0.01 | 99.99 |
108 | PULTE MORTGAGE LLC | 4 | 0.01 | 100.00 |
df2 = df1[(df1['oyear'] >= 2012) & (df1['oyear'] <= 2023)]
regression_model = sm_ols('Rate ~ UPB + Term + LTV + DTI + FICO + C(oyear)', data=df2).fit()
Please note that there is no need to create dummy variables for all the years (-1) in Python. When using the function sm_ols(), if you wrap a variable around C() in the equation you are telling the function to treat this variable as a categorical variable. In this manner, the function will automatically create dummy variables when developing the regression model. Hence in the output shown below, there are coefficients for every single year (Except for 2012).
info_dict={'R-squared' : lambda x: f"{x.rsquared:.2f}",
'Adj R-squared' : lambda x: f"{x.rsquared_adj:.2f}",
'No. observations' : lambda x: f"{int(x.nobs):d}"}
# This summary col function combines a bunch of regressions into one nice table
print('='*33)
print(" y = Rate")
print(summary_col(results=[regression_model], # list the result obj here
float_format='%0.6f',
stars = True, # stars are easy way to see if anything is statistically significant
model_names=['Regression Model'], # these are bad names, lol. Usually, just use the y variable name
info_dict=info_dict,
regressor_order=[ 'Intercept','UPB','Term','LTV','DTI','FICO',
"C(oyear)[T.2013]", "C(oyear)[T.2014]", "C(oyear)[T.2015]",
"C(oyear)[T.2016]", "C(oyear)[T.2017]", "C(oyear)[T.2018]",
"C(oyear)[T.2019]", "C(oyear)[T.2020]", "C(oyear)[T.2021]",
"C(oyear)[T.2022]", "C(oyear)[T.2023]"]
)
)
=================================
y = Rate
=================================
Regression Model
---------------------------------
Intercept 3.963274***
(0.057749)
UPB -0.000001***
(0.000000)
Term 0.003772***
(0.000045)
LTV 0.002183***
(0.000182)
DTI 0.003759***
(0.000321)
FICO -0.002098***
(0.000068)
C(oyear)[T.2013] 0.188277***
(0.016187)
C(oyear)[T.2014] 0.502853***
(0.015619)
C(oyear)[T.2015] 0.212522***
(0.015351)
C(oyear)[T.2016] 0.031216**
(0.015226)
C(oyear)[T.2017] 0.393333***
(0.015249)
C(oyear)[T.2018] 0.913512***
(0.015235)
C(oyear)[T.2019] 0.438511***
(0.015182)
C(oyear)[T.2020] -0.500923***
(0.015103)
C(oyear)[T.2021] -0.747499***
(0.015148)
C(oyear)[T.2022] 1.263610***
(0.015339)
C(oyear)[T.2023] 2.476677***
(0.022475)
R-squared 0.686620
R-squared Adj. 0.686457
R-squared 0.69
Adj R-squared 0.69
No. observations 30851
=================================
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01
regression_model.summary()
Dep. Variable: | Rate | R-squared: | 0.687 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.686 |
Method: | Least Squares | F-statistic: | 4222. |
Date: | Tue, 05 Dec 2023 | Prob (F-statistic): | 0.00 |
Time: | 22:38:22 | Log-Likelihood: | -23466. |
No. Observations: | 30851 | AIC: | 4.697e+04 |
Df Residuals: | 30834 | BIC: | 4.711e+04 |
Df Model: | 16 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
Intercept | 3.9633 | 0.058 | 68.630 | 0.000 | 3.850 | 4.076 |
C(oyear)[T.2013] | 0.1883 | 0.016 | 11.632 | 0.000 | 0.157 | 0.220 |
C(oyear)[T.2014] | 0.5029 | 0.016 | 32.196 | 0.000 | 0.472 | 0.533 |
C(oyear)[T.2015] | 0.2125 | 0.015 | 13.844 | 0.000 | 0.182 | 0.243 |
C(oyear)[T.2016] | 0.0312 | 0.015 | 2.050 | 0.040 | 0.001 | 0.061 |
C(oyear)[T.2017] | 0.3933 | 0.015 | 25.794 | 0.000 | 0.363 | 0.423 |
C(oyear)[T.2018] | 0.9135 | 0.015 | 59.962 | 0.000 | 0.884 | 0.943 |
C(oyear)[T.2019] | 0.4385 | 0.015 | 28.884 | 0.000 | 0.409 | 0.468 |
C(oyear)[T.2020] | -0.5009 | 0.015 | -33.168 | 0.000 | -0.531 | -0.471 |
C(oyear)[T.2021] | -0.7475 | 0.015 | -49.347 | 0.000 | -0.777 | -0.718 |
C(oyear)[T.2022] | 1.2636 | 0.015 | 82.381 | 0.000 | 1.234 | 1.294 |
C(oyear)[T.2023] | 2.4767 | 0.022 | 110.196 | 0.000 | 2.433 | 2.521 |
UPB | -5.431e-07 | 2.29e-08 | -23.735 | 0.000 | -5.88e-07 | -4.98e-07 |
Term | 0.0038 | 4.45e-05 | 84.769 | 0.000 | 0.004 | 0.004 |
LTV | 0.0022 | 0.000 | 12.019 | 0.000 | 0.002 | 0.003 |
DTI | 0.0038 | 0.000 | 11.717 | 0.000 | 0.003 | 0.004 |
FICO | -0.0021 | 6.81e-05 | -30.805 | 0.000 | -0.002 | -0.002 |
Omnibus: | 2491.237 | Durbin-Watson: | 1.992 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 15192.203 |
Skew: | 0.065 | Prob(JB): | 0.00 |
Kurtosis: | 6.435 | Cond. No. | 5.72e+06 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.72e+06. This might indicate that there are
strong multicollinearity or other numerical problems.