有一个用于将异常值替换为中位数的代码,适用于
np.array
。
def replace_outliers_with_median(err_arr):
a = err_arr
med = np.median(a)
outlierConstant = 1.5
upper_quartile = np.percentile(a, 80)
lower_quartile = np.percentile(a, 20)
IQR = (upper_quartile - lower_quartile) * outlierConstant
quartileSet = (lower_quartile - IQR, upper_quartile + IQR)
output = np.where((a >= quartileSet[0]) & (a <= quartileSet[1]), a, med)
return output
例子:
arr =np.array([ [-8.33717,-8.3755,-7.83968,-7.09376, -6.37511,
-5.81576,-11.46364,-5.30386,-5.20346,-5.35983,
-5.35344,-5.2447,-5.04924,-4.98142,-4.72909,
4.86889,-4.95571,-4.93626,-5.17441,-5.18517,
-5.30639,-5.36995,-8]])
df = replace_outliers_with_median(arr)
output:
array([[-8.33717, -8.3755 , -7.83968, -7.09376, -6.37511, -5.81576,
-5.30639, -5.30386, -5.20346, -5.35983, -5.35344, -5.2447 ,
-5.04924, -4.98142, -4.72909, -5.30639, -4.95571, -4.93626,
-5.17441, -5.18517, -5.30639, -5.36995, -8. ]])
年龄 > 75
。 +1 - Ekaba Bisong