8
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
# Sample data
data = pd.DataFrame({
'Salary': [40000, 50000, 60000, 70000, 80000, 55000, 65000,
75000, 85000],
'Education': ['High School', 'Bachelor\'s', 'Master\'s',
'High School', 'Bachelor\'s', 'Master\'s',
'High School', 'Bachelor\'s', 'Master\'s'],
'Experience': [2, 3, 4, 5, 6, 7, 3, 5, 8]
})
# Convert Education to categorical with High School as base
data['Education'] = pd.Categorical(data['Education'],
categories=['High School', "Bachelor's",
"Master's"])
# Fit regression model
model = smf.ols('Salary ~ C(Education) + Experience',
data=data).fit()
print(model.summary())
Comments
Post a Comment