1. Import Dataset from above link. 2. Rescaling: Normalised the dataset using MinMaxScaler class 3. Standardizing Data (transform them into a standard Gaussian distribution with a mean of 0 and a standard deviation of 1)
from pandas import *
from numpy import *
from sklearn import preprocessing
import scipy.stats as s
df=read_csv("winequality-red.csv")
print(df)
print("Rescaling Data")
print("\n\n Data scaled between 0 and 1")
data_scaler=preprocessing.MinMaxScaler(feature_range=(0,1))
data_scaled=data_scaler.fit_transform(df)
print("..........................")
print(data_scaled.round(2))
print("\n Standardizing Data ")
arr=array(df)
print(arr)
print("\n Initial mean : ",s.tmean(arr).round(2))
print("\n Initial Standard Deviation : ",round(arr.std(),2))
x_scaled=preprocessing.scale(arr)
x_scaled.mean(axis=0)
x_scaled.std(axis=0)
print("\n Standardizing data : \n",x_scaled.round(2))
print("\n Scaled Mean : ",s.tmean(x_scaled).round(2))
print("\n Scaled Standard Deviation : ",round(x_scaled.std(),2))
Comments
Post a Comment