[text] B

Viewer

  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.compose import ColumnTransformer
  4. from sklearn.preprocessing import OneHotEncoder
  5. from sklearn.linear_model import LinearRegression
  6. from sklearn.metrics import mean_squared_error
  7.  
  8. # Load the data
  9. regression_data = pd.read_excel('your_file.xlsx', sheet_name='Regression Data')
  10.  
  11. # Separate features (X) and target variable (y)
  12. X = regression_data.drop(columns=['charges'])
  13. y = regression_data['charges']
  14.  
  15. # Split the data into train and test sets (80% train, 20% test)
  16. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  17.  
  18. # Identify categorical columns
  19. categorical_cols = [col for col in X_train.columns if X_train[col].dtype == 'object']
  20.  
  21. # Apply one-hot encoding to categorical columns
  22. preprocessor = ColumnTransformer(
  23.     transformers=[('cat', OneHotEncoder(), categorical_cols)],
  24.     remainder='passthrough')
  25.  
  26. # Fit and transform the training data
  27. X_train_encoded = preprocessor.fit_transform(X_train)
  28.  
  29. # Transform the test data
  30. X_test_encoded = preprocessor.transform(X_test)
  31.  
  32. # Train the model
  33. model = LinearRegression()
  34. model.fit(X_train_encoded, y_train)
  35.  
  36. # Predict on the test set
  37. y_pred = model.predict(X_test_encoded)
  38.  
  39. # Evaluate the model
  40. mse = mean_squared_error(y_test, y_pred)
  41. print("Mean Squared Error:", mse)

Editor

You can edit this paste and save as new:


File Description
  • B
  • Paste Code
  • 27 Apr-2024
  • 1.35 Kb
You can Share it: