例如,我有以下数据。
现在,我想分别对KN值和GY值的各个品种进行数据归一化处理。
简单来说,我的做法是:
然后我下载每个数据并将其与Excel文件中的每个品种匹配。但是在我的实际数据中,我有超过30个品种,手动匹配是不可能的。我相信有简单的方法可以在R中自动获得每组归一化数据。
你能告诉我如何做吗?
非常感谢!!
dataA=structure(list(variety = c("CV1", "CV1", "CV1", "CV1", "CV1",
"CV1", "CV1", "CV1", "CV1", "CV1", "CV1", "CV1", "CV1", "CV1",
"CV1", "CV1", "CV1", "CV1", "CV1", "CV1", "CV1", "CV1", "CV1",
"CV1", "CV1", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2",
"CV2", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2",
"CV2", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2", "CV2",
"CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV3",
"CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV3",
"CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV3", "CV4", "CV4",
"CV4", "CV4", "CV4", "CV4", "CV4", "CV4", "CV4", "CV4", "CV4",
"CV4", "CV4", "CV4", "CV4", "CV4", "CV4", "CV4", "CV4", "CV4",
"CV4", "CV4", "CV4", "CV4", "CV4"), KN = c(3150, 2646, 3024,
3402, 3260.25, 3276, 3150, 3685.5, 3260.25, 3402, 4672, 4234,
4599, 4526, 4599, 4526, 4234, 4088, 3504, 3942, 3550, 3550, 3124,
4437.5, 3550, 4313.25, 4118, 4153.5, 4473, 3851.75, 4260, 4952.25,
5680, 5112, 5431.5, 3976, 5112, 4828, 4544, 5112, 4632.75, 5271.75,
3976, 4792.5, 4544, 4313.25, 4952.25, 4792.5, 4260, 4952.25,
3192.75, 2580, 2999.25, 2580, 3096, 2902.5, 2805.75, 2332.75,
2999.25, 2666, 3240, 3648, 3360, 3564, 3360, 3360, 3552, 3648,
3456, 4320, 3256, 3564, 2992, 3663, 3168, 3861, 3663, 3861, 3564,
3465, 2886, 3042, 2886, 2886, 3412.5, 2886, 3510, 3246.75, 3159,
3159, 3384, 3760, 4018.5, 3572, 3912.75, 4018.5, 4230, 3572,
3478, 3760), GY = c(9729.7744491255, 8562.20151523044, 9885.45084031151,
10741.6709918346, 9729.7744491255, 10118.9654270905, 9963.28903590451,
10975.1855786136, 10118.9654270905, 10041.1272314975, 12266.3112039261,
11454.5700213133, 12356.5046686608, 12627.0850628651, 13889.7935691516,
11995.7308097218, 11364.3765565786, 12085.9242744566, 11725.1504155176,
10552.6353739658, 10000.3548433298, 12368.8599378026, 10438.9668978618,
11316.1910069258, 10614.4117196746, 12632.0271705218, 13421.5288686794,
12456.582348709, 12368.8599378026, 12105.6927050834, 17281.3149485611,
17281.3149485611, 16404.090839497, 17719.9270030931, 18158.5390576251,
15877.7563740586, 19825.2648648467, 18333.9838794379, 17105.8701267483,
19123.4855775955, 14035.5857450242, 15351.4219086202, 14474.1977995562,
15702.3115522458, 15351.4219086202, 11930.2478832706, 15702.3115522458,
13860.1409232114, 13947.8633341178, 8859.96350154653, 8712.93579875975,
8075.40391104562, 8553.55282683122, 8394.16985490268, 9137.95705723583,
9403.59534378339, 8606.68048414073, 7969.1485964266, 8819.19111337877,
8606.68048414073, 11268.005457273, 13106.4695055649, 10437.7313709476,
10674.9525384691, 9844.67845214374, 10971.478997871, 11623.8372085553,
11327.3107491533, 10852.8684141103, 11149.3948735122, 11959.9005292108,
14134.4278981582, 11851.1741607634, 13101.5273979082, 12068.6268976581,
13373.3433190266, 12286.0796345529, 12394.8060030003, 12449.1691872239,
13427.7065032503, 11516.3463670221, 10745.3775725771, 11275.418618758,
11660.9030159805, 11805.4596649389, 11660.9030159805, 12287.3151614671,
12865.5417573008, 11564.5319166749, 12046.387413203, 14691.650536451,
14807.7900663834, 15504.6272459779, 14517.4412415523, 14923.9295963158,
14807.7900663834, 14923.9295963158, 13936.7435918902, 14459.3714765861,
14517.4412415523)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-100L))
现在,我想分别对KN值和GY值的各个品种进行数据归一化处理。
简单来说,我的做法是:
CV1=subset(dataA, variety=="CV1")
CV2=subset(dataA, variety=="CV2")
CV3=subset(dataA, variety=="CV3")
CV1$CV1_KN=scale(CV1$KN, center=TRUE, scale=TRUE)
CV2$CV2_KN=scale(CV2$KN, center=TRUE, scale=TRUE)
CV3$CV3_KN=scale(CV3$KN, center=TRUE, scale=TRUE)
CV1$CV1_GY=scale(CV1$GY, center=TRUE, scale=TRUE)
CV2$CV2_GY=scale(CV2$GY, center=TRUE, scale=TRUE)
CV3$CV3_GY=scale(CV3$GY, center=TRUE, scale=TRUE)
然后我下载每个数据并将其与Excel文件中的每个品种匹配。但是在我的实际数据中,我有超过30个品种,手动匹配是不可能的。我相信有简单的方法可以在R中自动获得每组归一化数据。
你能告诉我如何做吗?
非常感谢!!