Code for Imbalance Learning With Imblearn and Smote Variants Libraries in Python Tutorial


View on Github

imb_learning.py

import numpy as np 
import pandas as pd 
from imblearn.under_sampling import RandomUnderSampler,EditedNearestNeighbours,InstanceHardnessThreshold,TomekLinks
from imblearn.over_sampling import RandomOverSampler 
import smote_variants as sv


df=pd.read_csv("creditcard.csv")
y=df["Class"]
X=df.drop(["Time","Class"],axis=1)
print(y.value_counts())


under=RandomUnderSampler()
X_und,y_und=under.fit_resample(X,y)
print(len(X_und[X_und==1])==len(X_und[X_und==0]))


over=RandomOverSampler()
X_und,y_und=over.fit_resample(X,y)
print(len(X_und[X_und==1])==len(X_und[X_und==0]))


under_samp_models=[EditedNearestNeighbours(),InstanceHardnessThreshold(),TomekLinks()]
for under_samp_model in under_samp_models:
    X_und,y_und=under_samp_model.fit_resample(X,y)
    
svs=[sv.kmeans_SMOTE(),sv.Safe_Level_SMOTE(),sv.SMOTE_Cosine()]
for over_sampler in svs: 
    X_over_samp, y_over_samp= over_sampler.sample(X, y)