Selection Analysis

Willyan Junior Adorian Bandeira

2025-03-20

#Procedures for genotype selection

The EstimateBreed package provides several useful functions for selecting superior genotypes.

##Heterosis and heterobeltiosis of hybrids

The calculation of heterosis and heterobeltiosis is crucial for hybrid selection, as it measures the hybrid’s performance relative to its parents. Heterosis reflects the overall vigor, while heterobeltiosis compares the hybrid to the best parent. These metrics help identify superior hybrids with desirable traits like higher productivity and better disease resistance.

These parameters can be easily calculated with the het() function.

library(EstimateBreed)

data("maize")
#Extract heterosis and heterobeltiosis
with(maize,het(GEN,GM,GP,PR,REP,param="all"))
#>     GEN Heterosis Heterobeltiosis
#> 1  HIB1  71.70025       51.199637
#> 2  HIB1  51.97650       37.288820
#> 3  HIB1  88.45976       87.525920
#> 4  HIB2 124.81676      100.825736
#> 5  HIB2 117.88532       87.407158
#> 6  HIB2 151.90865       95.751797
#> 7  HIB3  86.77350       50.399284
#> 8  HIB3  90.83780       53.375374
#> 9  HIB3  98.53781       57.174523
#> 10 HIB4  70.53690        1.978555
#> 11 HIB4  69.12610        2.696836
#> 12 HIB4  69.48340        2.121636

#Only extract heterosis
with(maize,het(GEN,GM,GP,PR,REP,param = "het"))
#>     GEN Heterosis
#> 1  HIB1  71.70025
#> 2  HIB1  51.97650
#> 3  HIB1  88.45976
#> 4  HIB2 124.81676
#> 5  HIB2 117.88532
#> 6  HIB2 151.90865
#> 7  HIB3  86.77350
#> 8  HIB3  90.83780
#> 9  HIB3  98.53781
#> 10 HIB4  70.53690
#> 11 HIB4  69.12610
#> 12 HIB4  69.48340

#Extract only heterobeltiosis
with(maize,het(GEN,GM,GP,PR,REP,param = "hetb"))
#>     GEN Heterobeltiosis
#> 1  HIB1       51.199637
#> 2  HIB1       37.288820
#> 3  HIB1       87.525920
#> 4  HIB2      100.825736
#> 5  HIB2       87.407158
#> 6  HIB2       95.751797
#> 7  HIB3       50.399284
#> 8  HIB3       53.375374
#> 9  HIB3       57.174523
#> 10 HIB4        1.978555
#> 11 HIB4        2.696836
#> 12 HIB4        2.121636

##Industrial quality indicators

Industrial quality indicators are very important for the selection of genotypes with industrial aptitude. They are widely applied to cereals (oats, wheat, barley, rye and corn).

Some interesting functions offered by the package are:

rend_ind(), to calculate the hulling index and industrial yield of white oats.

library(EstimateBreed)

data("aveia")
# Calculate the industrial yield without extracting the average
with(aveia, rend_ind(GEN,NG2M,MG,MC,RG))
#>    GEN        ID        RI
#> 1   G1 0.6421053  248.0992
#> 2   G1 0.7058824  259.2127
#> 3   G1 0.7263158  401.2875
#> 4   G1 0.6626506  142.5706
#> 5   G2 0.7636364  513.0272
#> 6   G2 0.7589286  473.7050
#> 7   G2 0.7882353  421.0346
#> 8   G2 0.7943925  735.9787
#> 9   G3 0.7659574  517.4221
#> 10  G3 0.7333333  198.2880
#> 11  G3 0.6808511  189.6073
#> 12  G3 0.7362637  257.6884
#> 13  G4 0.7844828 1309.5622
#> 14  G4 0.7551020  557.8128
#> 15  G4 0.7608696  608.3568
#> 16  G4 0.7530864  299.6963
#> 17  G5 0.7346939  698.7208
#> 18  G5 0.7583333  687.4180
#> 19  G5 0.6547619  144.2462
#> 20  G6 0.7083333  421.3280
#> 21  G6 0.7547170  376.3683
#> 22  G6 0.6822430  201.4227
#> 23  G6 0.6179775  241.5106
#> 24  G7 0.6893939  418.2682
#> 25  G7 0.7475728  409.8992
#> 26  G7 0.6818182  258.0423
#> 27  G8 0.6694915  779.4310
#> 28  G8 0.7117117  319.0226
#> 29  G8 0.6400000  436.5363
#> 30  G8 0.6666667  326.3160
#> 31  G9 0.7019231  679.3385
#> 32  G9 0.8080808  482.9684
#> 33  G9 0.7809524  685.0514
#> 34  G9 0.5595238  159.5463
#> 35 G10 0.7570093  920.7979
#> 36 G10 0.7578947  388.1068
#> 37 G10 0.7767857  617.4250
#> 38 G10 0.7578947  579.7885
#> 39 G11 0.7589286  536.0166
#> 40 G11 0.7943925  600.1238
#> 41 G11 0.7282609  281.4175
#> 42 G11 0.5384615  271.5605
#> 43 G12 0.7678571  694.0707
#> 44 G12 0.7631579  900.9143
#> 45 G12 0.7572816  665.8663
#> 46 G12 0.7478992  957.2217
#> 47 G13 0.6976744  503.7428
#> 48 G13 0.5543478  265.6668
#> 49 G13 0.7565217  575.4152
#> 50 G13 0.7636364  585.0671
#> 51 G14 0.6547619  374.1069
#> 52 G14 0.7325581  662.9329
#> 53 G14 0.5609756  330.6989
#> 54 G14 0.7222222  575.8133

# Calculate the industrial yield by extracting the average per genotype
with(aveia, rend_ind(GEN,NG2M,MG,MC,RG,stat="mean"))
#>    GEN        ID       RI
#> 1   G1 0.6842385 262.7925
#> 2  G10 0.7623961 626.5295
#> 3  G11 0.7050109 422.2796
#> 4  G12 0.7590489 804.5182
#> 5  G13 0.6930451 482.4730
#> 6  G14 0.6676295 485.8880
#> 7   G2 0.7762982 535.9364
#> 8   G3 0.7291014 290.7514
#> 9   G4 0.7633852 693.8570
#> 10  G5 0.7159297 510.1284
#> 11  G6 0.6908177 310.1574
#> 12  G7 0.7062616 362.0699
#> 13  G8 0.6719675 465.3265
#> 14  G9 0.7126200 501.7261

indviab(), to estimate viability parameters with traits measured in wheat crops.

library(EstimateBreed)

data("trigo")
#Ear viability index
with(trigo,indviab(TEST,NGE,NEE))
#>    Genotype     Index
#> 1        T1 0.7270983
#> 2        T2 0.7927928
#> 3        T3 0.7572954
#> 4        T4 0.7951413
#> 5        T5 0.7578163
#> 6        T6 0.7882816
#> 7        T7 0.7781609
#> 8        T8 0.7793054
#> 9        T9 0.8141160
#> 10      T10 0.8498195
#> 11      T11 0.8438819
#> 12      T12 0.8014337
#> 13      T13 0.7719665
#> 14      T14 0.8245132
#> 15      T15 0.8051075
#> 16      T16 0.7022956
#> 17      T17 0.7624164
#> 18      T18 0.7773723
#> 19      T19 0.7438854

#Ear harvest index
with(trigo,indviab(TEST,MGE,ME))
#>    Genotype     Index
#> 1        T1 0.7530794
#> 2        T2 0.7211403
#> 3        T3 0.7250430
#> 4        T4 0.7276371
#> 5        T5 0.7527310
#> 6        T6 0.7404977
#> 7        T7 0.7513300
#> 8        T8 0.7555921
#> 9        T9 0.7464156
#> 10      T10 0.7605856
#> 11      T11 0.7486986
#> 12      T12 0.7393468
#> 13      T13 0.7484841
#> 14      T14 0.7646920
#> 15      T15 0.7515557
#> 16      T16 0.6933280
#> 17      T17 0.7543860
#> 18      T18 0.7340161
#> 19      T19 0.7276199

#Spikelet deposition index in the ear
with(trigo,indviab(TEST,NEE,CE))
#>    Genotype    Index
#> 1        T1 6.682692
#> 2        T2 6.148688
#> 3        T3 6.207658
#> 4        T4 6.286344
#> 5        T5 6.517241
#> 6        T6 6.135952
#> 7        T7 6.561086
#> 8        T8 6.169643
#> 9        T9 6.276316
#> 10      T10 6.276435
#> 11      T11 6.301329
#> 12      T12 6.065217
#> 13      T13 6.298682
#> 14      T14 6.298701
#> 15      T15 6.514886
#> 16      T16 5.824513
#> 17      T17 6.493023
#> 18      T18 6.061947
#> 19      T19 6.470234

hw(), which applies a linear model to obtain the hectoliter weight of wheat, oat, barley and rye crops.

library(EstimateBreed)

GEN <- rep(paste("G", 1:5, sep=""), each = 3)
REP <- rep(1:3, times = 5)
MG <- c(78.5, 80.2, 79.1, 81.3, 82.0, 80.8, 76.9, 78.1, 77.5, 83.2,
84.1, 82.9, 77.4, 78.9, 79.3)

data <- data.frame(GEN, REP, MG)

with(data,hw(GEN,MG,crop="trit"))
#>    GEN   HL       HW
#> 1   G1 78.5 25.53219
#> 2   G1 80.2 26.30029
#> 3   G1 79.1 25.80328
#> 4   G2 81.3 26.79729
#> 5   G2 82.0 27.11356
#> 6   G2 80.8 26.57138
#> 7   G3 76.9 24.80928
#> 8   G3 78.1 25.35146
#> 9   G3 77.5 25.08037
#> 10  G4 83.2 27.65575
#> 11  G4 84.1 28.06239
#> 12  G4 82.9 27.52020
#> 13  G5 77.4 25.03519
#> 14  G5 78.9 25.71292
#> 15  G5 79.3 25.89365

#Extract the average PH per genotype
with(data,hw(GEN,MG,crop="trit",stat="mean"))
#>   GEN       HW
#> 1  G1 25.87859
#> 2  G2 26.82741
#> 3  G3 25.08037
#> 4  G4 27.74611
#> 5  G5 25.54725

##ISGR

Obtain the genetic selection index for resilience (isgr()) for selecting genotypes for environmental stressors, as described by Bandeira et al. (2024).

library(EstimateBreed)

#Obtain environmental deviations
data("desvamb")
head(desvamb)
#> # A tibble: 6 × 3
#>   ENV    TMED  PREC
#>   <chr> <dbl> <dbl>
#> 1 E1     22.1  0   
#> 2 E1     22.7  0   
#> 3 E1     24.5  0.03
#> 4 E1     22.3  4.49
#> 5 E1     21.7  3.91
#> 6 E1     19.6  0.04

#Use DPclim for the ISGR function to identify deviations correctly
DPclim <- with(desvamb,desv_clim(ENV,TMED,PREC))

#Calculate the ISGR
data("genot")
head(genot)
#> # A tibble: 6 × 5
#>   GEN   ENV      NG    MG CICLO
#>   <chr> <chr> <dbl> <dbl> <dbl>
#> 1 L445  E1    1017.  142.   146
#> 2 L162  E2     847.  140.   150
#> 3 L277  E2     789.  131.   150
#> 4 L155  E2     735.  122.   150
#> 5 L166  E2     695.  117.   150
#> 6 L26   E2     664.  113.   150
isgr_index <- with(genot, isgr(GEN,ENV,NG,MG,CICLO))

#Define the water requirement per stage
isgr_index <- with(genot, isgr(GEN,ENV,NG,MG,CICLO,req=5,stage="rep"))

##Restriction of controls variability

It restricts the variability of witnesses for less biased genetic parameter estimates, as described by Carvalho et al. (2023). This can be accomplished with the restr() function.

library(EstimateBreed)

TEST <- rep(paste("T", 1:5, sep=""), each=3)
REP <- rep(1:3, times=5)
Xi <- rnorm(15, mean=10, sd=2)

data <- data.frame(TEST,REP,Xi)

#Apply the witness variability constraint
Control <- with(data, restr(TEST,REP,Xi,scenario = "restr",zstat = FALSE))

#Apply witness variability restriction with normalization (Z statistic)
Control <- with(data, restr(TEST,REP,Xi,scenario = "restr",zstat = TRUE))

The inbreeding coefficient and genetic parameters are key for evaluating genetic diversity and optimizing breeding strategies. The inbreeding coefficient indicates genetic relatedness, while genetic parameters guide the selection of superior genotypes, improving traits and maintaining long-term sustainability.

The inbreeding coefficient can be obtained with the COI() function.

library(EstimateBreed)

var <- c("A","B","C","D","E")
VF <- c(2.5, 3.0, 2.8, 3.2, 2.7)
VG <- c(1.2, 1.5, 1.3, 1.6, 1.4)
data <- data.frame(var,VG,VF)

#Calculating for just one generation
with(data,COI(var,VG,VF,generation = "F3"))
#> $Variances
#>      Generation var  VA_Total VD_Total VA_Between VD_Between VA_Within
#> F3.1         F3   A 0.8000000 1.600000        1.2        4.8       2.4
#> F3.2         F3   B 1.0000000 2.000000        1.5        6.0       3.0
#> F3.3         F3   C 0.8666667 1.733333        1.3        5.2       2.6
#> F3.4         F3   D 1.0666667 2.133333        1.6        6.4       3.2
#> F3.5         F3   E 0.9333333 1.866667        1.4        5.6       2.8
#>      VD_Within
#> F3.1       2.4
#> F3.2       3.0
#> F3.3       2.6
#> F3.4       3.2
#> F3.5       2.8
#> 
#> $Heritabilities
#>      Generation var      hVaT      hVdT      hVaE     hVdE      hVaD      hVdD
#> F3.1         F3   A 0.3200000 0.6400000 0.4800000 1.920000 0.9600000 0.9600000
#> F3.2         F3   B 0.3333333 0.6666667 0.5000000 2.000000 1.0000000 1.0000000
#> F3.3         F3   C 0.3095238 0.6190476 0.4642857 1.857143 0.9285714 0.9285714
#> F3.4         F3   D 0.3333333 0.6666667 0.5000000 2.000000 1.0000000 1.0000000
#> F3.5         F3   E 0.3456790 0.6913580 0.5185185 2.074074 1.0370370 1.0370370

Genetic parameters for balanced experiments can be estimated from the genpar() function. The methodology was described by Yadav et al. (2024).

library(EstimateBreed)
data("genot2")

#Geting parameters without cheking model assumptions
parameters <- genpar(genot2,Gen,Rep,var =c("VAR1", "VAR2"))
#> 
#> ==================================================
#> Analyzing Variable: VAR1 
#> ==================================================
#> 
#> Genotypic effect was significant for variable: VAR1 
#> 
#> ==================================================
#> Analyzing Variable: VAR2 
#> ==================================================
#> 
#> Genotypic effect was NOT significant for variable: VAR2
parameters$anova
#> $VAR1
#>             Df Sum Sq Mean Sq  F value Pr(>F)    
#> GEN         19 8972.7  472.25 120.9202 <2e-16 ***
#> REP          2    1.4    0.71   0.1828 0.8337    
#> Residuals   38  148.4    3.91                    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> $VAR2
#>             Df Sum Sq Mean Sq F value Pr(>F)
#> GEN         19 40.282  2.1201  1.5122 0.1365
#> REP          2  0.931  0.4655  0.3320 0.7195
#> Residuals   38 53.276  1.4020
parameters$gp
#> $VAR1
#>   Parameter        VAR1
#> 1    sigmaE   3.9054433
#> 2    sigmaG 156.1138901
#> 3    sigmaP 160.0193334
#> 4       ECV   6.5587072
#> 5       GCV  41.4671174
#> 6       PCV  41.9825972
#> 7        H2   0.9755939
#> 8        GA   0.6247277
#> 9       GAM   2.0733559

#Checking model assumptions
parameters <- genpar(genot2,Gen,Rep,var =c("VAR1", "VAR2"),check=TRUE)
#> 
#> ==================================================
#> Analyzing Variable: VAR1 
#> ==================================================
#> 
#> Performing assumption tests...
#> Shapiro-Wilk Normality Test: p-value = 0.22358 
#> Bartlett Homogeneity Test: p-value = 0.12747 
#> Levene Homogeneity Test: p-value = 0.72326 
#> Breusch-Pagan Heteroscedasticity Test: p-value = 0.05089 
#> 
#> --------------------------------------------------
#> 
#> Genotypic effect was significant for variable: VAR1 
#> 
#> ==================================================
#> Analyzing Variable: VAR2 
#> ==================================================
#> 
#> Performing assumption tests...
#> Shapiro-Wilk Normality Test: p-value = 0.48831 
#> Bartlett Homogeneity Test: p-value = 0.60586 
#> Levene Homogeneity Test: p-value = 0.99062 
#> Breusch-Pagan Heteroscedasticity Test: p-value = 0.06705 
#> 
#> --------------------------------------------------
#> 
#> Genotypic effect was NOT significant for variable: VAR2
parameters$anova
#> $VAR1
#>             Df Sum Sq Mean Sq  F value Pr(>F)    
#> GEN         19 8972.7  472.25 120.9202 <2e-16 ***
#> REP          2    1.4    0.71   0.1828 0.8337    
#> Residuals   38  148.4    3.91                    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> $VAR2
#>             Df Sum Sq Mean Sq F value Pr(>F)
#> GEN         19 40.282  2.1201  1.5122 0.1365
#> REP          2  0.931  0.4655  0.3320 0.7195
#> Residuals   38 53.276  1.4020
parameters$gp
#> $VAR1
#>   Parameter        VAR1
#> 1    sigmaE   3.9054433
#> 2    sigmaG 156.1138901
#> 3    sigmaP 160.0193334
#> 4       ECV   6.5587072
#> 5       GCV  41.4671174
#> 6       PCV  41.9825972
#> 7        H2   0.9755939
#> 8        GA   0.6247277
#> 9       GAM   2.0733559