The File_Operation class handles model serialization, deserialization, and file management operations. It saves trained models to disk and loads them for prediction, supporting the multi-model architecture where different models are trained for different data clusters.
Directory Structure:Models are saved with the following structure:
models/└── {filename}/ └── {filename}.sav
Example Usage:
from file_operations.file_methods import File_Operationfrom sklearn.svm import SVC# Train a modelmodel = SVC(kernel='rbf', C=1.0)model.fit(X_train, Y_train)# Save the modelfile_op = File_Operation(file_object, logger_object)result = file_op.save_model(model, 'SVM_Cluster_0')if result == 'success': print("Model saved successfully")
Implementation:
self.logger_object.log( self.file_object, 'Entered the save_model method of the File_Operation class')try: # Create directory path path = os.path.join(self.model_directory, filename) # Remove existing model if present if os.path.isdir(path): shutil.rmtree(self.model_directory) os.makedirs(path) else: os.makedirs(path) # Save model using pickle with open(path + '/' + filename + '.sav', 'wb') as f: pickle.dump(model, f) self.logger_object.log( self.file_object, 'Model File ' + filename + ' saved. Exited the save_model method of the Model_Finder class' ) return 'success'except Exception as e: self.logger_object.log( self.file_object, 'Exception occured in save_model method of the Model_Finder class. Exception message: ' + str(e) ) self.logger_object.log( self.file_object, 'Model File ' + filename + ' could not be saved. Exited the save_model method of the Model_Finder class' ) raise Exception()
Overwrite Behavior:
If a model with the same filename exists, the entire models/ directory is removed and recreated. This prevents stale models from persisting.
from file_operations.file_methods import File_Operation# Load a saved modelfile_op = File_Operation(file_object, logger_object)loaded_model = file_op.load_model('SVM_Cluster_0')# Use for predictionspredictions = loaded_model.predict(X_new)print(f"Predictions: {predictions}")
Implementation:
self.logger_object.log( self.file_object, 'Entered the load_model method of the File_Operation class')try: with open( self.model_directory + filename + '/' + filename + '.sav', 'rb' ) as f: self.logger_object.log( self.file_object, 'Model File ' + filename + ' loaded. Exited the load_model method of the Model_Finder class' ) return pickle.load(f)except Exception as e: self.logger_object.log( self.file_object, 'Exception occured in load_model method of the Model_Finder class. Exception message: ' + str(e) ) self.logger_object.log( self.file_object, 'Model File ' + filename + ' could not be saved. Exited the load_model method of the Model_Finder class' ) raise Exception()
Expected File Path:
models/{filename}/{filename}.sav
The method expects the exact directory structure created by save_model()
Model filename (without extension) matching the cluster number
Example Usage:
from file_operations.file_methods import File_Operation# Determine which model to use for cluster 2file_op = File_Operation(file_object, logger_object)model_name = file_op.find_correct_model_file(cluster_number=2)print(f"Model for cluster 2: {model_name}")# Output: Model for cluster 2: XGBoost2# Load the identified modelmodel = file_op.load_model(model_name)
Implementation:
self.logger_object.log( self.file_object, 'Entered the find_correct_model_file method of the File_Operation class')try: self.cluster_number = cluster_number self.folder_name = self.model_directory self.list_of_model_files = [] self.list_of_files = os.listdir(self.folder_name) for self.file in self.list_of_files: try: if (self.file.index(str(self.cluster_number)) != -1): self.model_name = self.file except: continue self.model_name = self.model_name.split('.')[0] self.logger_object.log( self.file_object, 'Exited the find_correct_model_file method of the Model_Finder class.' ) return self.model_nameexcept Exception as e: self.logger_object.log( self.file_object, 'Exception occured in find_correct_model_file method of the Model_Finder class. Exception message: ' + str(e) ) self.logger_object.log( self.file_object, 'Exited the find_correct_model_file method of the Model_Finder class with Failure' ) raise Exception()
Naming Convention:Model files should include the cluster number in their name:
XGBoost0 - XGBoost model for cluster 0
SVM1 - SVM model for cluster 1
XGBoost2 - XGBoost model for cluster 2
Use descriptive filenames like ModelName{ClusterNumber} for easy identification
from file_operations.file_methods import File_Operationfrom best_model_finder.tuner import Model_Finderfrom data_preprocessing.clustering import KMeansClustering# Initialize objectsfile_op = File_Operation(file_object, logger_object)model_finder = Model_Finder(file_object, logger_object)kmeans = KMeansClustering(file_object, logger_object)# Create clustersoptimal_clusters = kmeans.elbow_plot(X_train)data_with_clusters = kmeans.create_clusters(X_train, optimal_clusters)# Train and save model for each clusterfor cluster_num in data_with_clusters['Cluster'].unique(): # Get cluster data cluster_data = data_with_clusters[ data_with_clusters['Cluster'] == cluster_num ] cluster_features = cluster_data.drop(['Cluster'], axis=1) cluster_labels = Y_train[cluster_data.index] # Find best model for this cluster model_name, model = model_finder.get_best_model( cluster_features, cluster_labels, X_test, Y_test ) # Save the model filename = f"{model_name}{cluster_num}" file_op.save_model(model, filename) print(f"Saved {filename} for cluster {cluster_num}")
from file_operations.file_methods import File_Operationfrom data_preprocessing.clustering import KMeansClustering# Initialize objectsfile_op = File_Operation(file_object, logger_object)# Load KMeans model to assign clusterskmeans_model = file_op.load_model('KMeans')# Predict clusters for new dataclusters = kmeans_model.predict(X_new)# Make predictions for each clusterpredictions = []for i, cluster_num in enumerate(clusters): # Find the right model for this cluster model_name = file_op.find_correct_model_file(cluster_num) # Load the model model = file_op.load_model(model_name) # Make prediction pred = model.predict([X_new[i]]) predictions.append(pred[0])print(f"Predictions: {predictions}")