File tree 1 file changed +34
-0
lines changed
1 file changed +34
-0
lines changed Original file line number Diff line number Diff line change
1
+ # Eda-regression Makefile
2
+
3
+ # Define variables
4
+ DATA_FILE = eda-regression.csv
5
+ OUTPUT_DIR = output/
6
+ MODEL_OUTPUT = model_output/
7
+
8
+ # Step 1: Import data
9
+ import-data:
10
+ aws s3 cp s3://your-bucket-name/$DATA_FILE $OUTPUT_DIR
11
+ gzip -d $OUTPUT_DIR$DATA_FILE.gz
12
+
13
+ # Step 2: Clean data (e.g. handle missing values, convert datatypes)
14
+ clean-data: import-data
15
+ python clean_data.py $OUTPUT_DIR$DATA_FILE
16
+
17
+ # Step 3: Explore data using EDA tools (e.g. correlation matrix, histograms)
18
+ eda:
19
+ python eda.py $OUTPUT_DIR$DATA_FILE
20
+
21
+ # Step 4: Split data into training and testing sets
22
+ split-data: clean-data eda
23
+ python split_data.py $OUTPUT_DIR$DATA_FILE 0.8
24
+
25
+ # Step 5: Train a machine learning model (e.g. linear regression)
26
+ train-model: split-data
27
+ python train_model.py $OUTPUT_DIR$DATA_FILE.split 0.2
28
+
29
+ # Step 6: Evaluate the trained model using metrics (e.g. mean squared error)
30
+ evaluate-model: train-model
31
+ python evaluate_model.py $OUTPUT_DIR$MODEL_OUTPUT/model.pkl
32
+
33
+ # Default target (runs all steps)
34
+ default: import-data clean-data eda split-data train-model evaluate-model
You can’t perform that action at this time.
0 commit comments