Mini Project (BDA) Output
Mini Project (BDA) Output
ipynb - Colab
except Exception as e:
print(f"Error loading data from Kaggle: {e}")
print("Falling back to URL loading...")
try:
# Fallback to GitHub URLs
train_url = "https://raw.githubusercontent.com/suvikramsain/Bigmart-Sales/master/Train.csv"
test_url = "https://raw.githubusercontent.com/suvikramsain/Bigmart-Sales/master/Test.csv"
train = pd.read_csv(train_url)
test = pd.read_csv(test_url)
print("Data loaded successfully from GitHub URLs")
except:
print("Error loading data from URLs. Using local files if available.")
try:
train = pd.read_csv('Train.csv')
test = pd.read_csv('Test.csv')
print("Data loaded successfully from local files")
except:
https://colab.research.google.com/drive/1nQacKbOQQd-CivRD_lxjOwG0_1Eg0wf0#scrollTo=dx1pZ_GLtDdh&printMode=true 1/5
4/22/25, 1:36 AM Untitled4.ipynb - Colab
print("Error: Unable to load data. Please check data availability.")
import sys
sys.exit(1)
Train data columns: ['Item_Identifier', 'Item_Weight', 'Item_Fat_Content', 'Item_Visibility', 'Item_Type', 'Item_MRP', 'Outlet_Ident
Outlet_Type Item_Outlet_Sales
0 Supermarket Type1 3735.1380
1 Supermarket Type2 443.4228
2 Supermarket Type1 2097.2700
3 Grocery Store 732.3800
4 Supermarket Type1 994.7052
https://colab.research.google.com/drive/1nQacKbOQQd-CivRD_lxjOwG0_1Eg0wf0#scrollTo=dx1pZ_GLtDdh&printMode=true 2/5
4/22/25, 1:36 AM Untitled4.ipynb - Colab
Item_MRP 0
Outlet_Identifier 0
Outlet_Establishment_Year 0
Outlet_Size 1606
Outlet_Location_Type 0
Outlet_Type 0
dtype: int64
# Data preprocessing
# Fill missing values
combined['Item_Weight'].fillna(combined['Item_Weight'].mean(), inplace=True)
combined['Outlet_Size'].fillna('Unknown', inplace=True)
# Feature Engineering
# Extract year feature from establishment year
current_year = 2025
combined['Outlet_Years'] = current_year - combined['Outlet_Establishment_Year']
combined.drop('Outlet_Establishment_Year', axis=1, inplace=True)
# Normalize Item_Visibility
combined['Item_Visibility'] = combined['Item_Visibility'] / combined['Item_Visibility'].max()
https://colab.research.google.com/drive/1nQacKbOQQd-CivRD_lxjOwG0_1Eg0wf0#scrollTo=dx1pZ_GLtDdh&printMode=true 3/5
4/22/25, 1:36 AM Untitled4.ipynb - Colab
RandomForestRegressor(max_depth=10, random_state=42)
# Feature importance
feature_importances = pd.DataFrame({
'Feature': X_train.columns,
'Importance': rf.feature_importances_
}).sort_values('Importance', ascending=False)
print("\nAnalysis completed!")
https://colab.research.google.com/drive/1nQacKbOQQd-CivRD_lxjOwG0_1Eg0wf0#scrollTo=dx1pZ_GLtDdh&printMode=true 4/5
4/22/25, 1:36 AM Untitled4.ipynb - Colab
Analysis completed!
https://colab.research.google.com/drive/1nQacKbOQQd-CivRD_lxjOwG0_1Eg0wf0#scrollTo=dx1pZ_GLtDdh&printMode=true 5/5