In the following code block, we can see how we can import the required libraries:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold
import matplotlib.pyplot as plt
We read our data and split the features and the response variable:
# Let's read our data.
df_autodata = pd.read_csv("autompg.csv")
# Fill NAs with the median value
df_autodata['horsepower'].fillna(df_autodata['horsepower'].median(), inplace=True)
# Drop carname variable
df_autodata.drop(['carname'], axis=1, inplace=True)
X = df_autodata.iloc[:,1:8]
Y = df_autodata.iloc[:,0]
X=np.array(X)
Y=np.array(Y)