8

import pandas as pd

import statsmodels.api as sm

import statsmodels.formula.api as smf

# Sample data

data = pd.DataFrame({

'Salary': [40000, 50000, 60000, 70000, 80000, 55000, 65000, 75000, 85000],

'Education': ['High School', 'Bachelor\'s', 'Master\'s', 'High School', 'Bachelor\'s', 'Master\'s',

'High School', 'Bachelor\'s', 'Master\'s'],

'Experience': [2, 3, 4, 5, 6, 7, 3, 5, 8]

})

# Convert Education to categorical with High School as base

data['Education'] = pd.Categorical(data['Education'], categories=['High School', "Bachelor's",

"Master's"])

# Fit regression model

model = smf.ols('Salary ~ C(Education) + Experience', data=data).fit()

print(model.summary())


Comments