How to perform a biclustering analysis

  library(blockmodels4inventories)

This vignette describes how to use a biclustering method using the blockmodels4inventories library, on inventories. This library encapsulates the blockmodels library and provides useful output graphics.

Create the matrix

  data(dfcrop)

  print(str(dfcrop))
#> 'data.frame':    2659 obs. of  3 variables:
#>  $ ident: int  1 2 3 4 5 6 7 8 9 10 ...
#>  $ farm : chr  "FA-11" "FA-11" "FA-12" "FA-12" ...
#>  $ crops: Factor w/ 43 levels "Amaranthus","Arrow roots",..: 30 31 30 34 40 31 23 5 33 12 ...
#> NULL
  print(head(dfcrop))
#>   ident  farm        crops
#> 1     1 FA-11 Pearl Millet
#> 2     2 FA-11   Pigeon pea
#> 3     3 FA-12 Pearl Millet
#> 4     4 FA-12      Sorghum
#> 5     5 FA-12      Tobacco
#> 6     6 FA-12   Pigeon pea
  
  # Number of farms
  print(length(unique(dfcrop$farm)))
#> [1] 400
  
  # Number of crops
  print(length(unique(dfcrop$crops)))
#> [1] 43
  
  # Create a useful matrix
  test1<-buildMatrix(data=dfcrop,var1="crops",var2="farm")
#> [1] "Warning: Each row of input data are not identified by a unique combination of keys."
#> [1] "The function had to suppress the duplicates!"

  print(test1[1:5,1:5])
#>              
#>               FA-1 FA-10 FA-11 FA-12 FA-13
#>   Amaranthus     0     0     0     0     0
#>   Arrow roots    0     0     0     0     0
#>   Avocado        0     0     0     0     0
#>   Banana         0     0     0     0     0
#>   Beans          1     1     1     1     1
  
  plotMatrix(M=test1,df.row=NULL,df.col=NULL,var1="Especes",var2="Agriculteurs",
          model="bernoulli",plotOpt="incidence",degreeOrder=TRUE,axisX=TRUE,angleX=45,
          axisY=TRUE,mytitle=NULL,sizextick=0.5, sizeytick=0.5, ticklength = 0.25,
          colxtick="black",colytick="black")

Classical biclustering

Classical biclustering algorithms tend to create groups of rows (resp. columns) with similar degrees (number of ones in the rows) which for inventory data, lead to groups of common plants and groups of rare plants.

  #biclassification classique
  LBM_Classique <- biclusteringLBM(test1,model="bernoulli",
                                   sortGroups=T, verbosity=0,ncores=1)

Let’s have a look to the output object of a biclustering analysis:

  str(LBM_Classique)
#> List of 3
#>  $ classif.row   :'data.frame':  43 obs. of  2 variables:
#>   ..$ mbrshp.row         : int [1:43] 6 6 6 5 2 6 6 6 6 5 ...
#>   ..$ posterior.proba.row: num [1:43] 0.988 0.988 0.988 0.988 0.988 ...
#>  $ classif.col   :'data.frame':  400 obs. of  2 variables:
#>   ..$ mbrshp.col         : int [1:400] 1 1 1 1 1 1 1 1 1 1 ...
#>   ..$ posterior.proba.col: num [1:400] 1 1 1 1 1 1 1 1 1 1 ...
#>  $ lbm.parameters:List of 2
#>   ..$ pi          : num [1:6, 1] 0.30935 0.88629 0.00509 0.12044 0.70307 ...
#>   ..$ n_parameters: num 6
#>  - attr(*, "class")= chr "biclustering"
  head(LBM_Classique[[1]])
#>             mbrshp.row posterior.proba.row
#> Amaranthus           6           0.9884793
#> Arrow roots          6           0.9884793
#> Avocado              6           0.9884793
#> Banana               5           0.9884793
#> Beans                2           0.9884793
#> Biligan              6           0.9884793
  head(LBM_Classique[[2]])
#>       mbrshp.col posterior.proba.col
#> FA-1           1                   1
#> FA-10          1                   1
#> FA-11          1                   1
#> FA-12          1                   1
#> FA-13          1                   1
#> FA-14          1                   1
  head(LBM_Classique[[3]])
#> $pi
#>             [,1]
#> [1,] 0.309347134
#> [2,] 0.886294273
#> [3,] 0.005094289
#> [4,] 0.120437325
#> [5,] 0.703072650
#> [6,] 0.041883539
#> 
#> $n_parameters
#> [1] 6

  plotMatrix(M=test1,df.row=LBM_Classique[[1]],df.col=LBM_Classique[[2]],
             var1="Especes",var2="Agriculteurs",
             model="bernoulli",plotOpt="biclustering",
             degreeOrder=FALSE,axisX=TRUE,axisY=TRUE,angleX=45,
             mytitle=NULL,sizextick=0.5, sizeytick=0.5, ticklength = 0.25,
             colxtick="black",colytick="black")

Degree corrected biclustering

(Classical) biclustering algorithms tend to create groups of rows (resp. columns) with similar degrees (number of ones in the rows). For inventory data, this leads to groups of common plants and groups of rare plants.

However, grouping plants according to their overall abundance is sometimes irrelevant.

Intuitively, degree corrected biclustering methods aim at treating the overall abundance as a nuisance parameter. Although the procedure is quite different, one may think of renormalizing the rows (and columns) by their total number of ones and then grouping similar rows (and columns). For inventory data, degree corrected biclustering can group together one rare plant and a very common plant if they rather tend to be growed by the same groups of farms.


#biclassification à degré corrigé sur lignes et colonnes
LBM_Deg_Corr <- biclusteringLBM(test1,model="bernoulli",
                                degreeCorrection = TRUE,typeDegCorr="both",
                                sortGroups=T, 
                                verbosity=0,ncores=1)


plotMatrix(M=test1,df.row=LBM_Deg_Corr[[1]],df.col=LBM_Deg_Corr[[2]],
           var1="Especes",var2="Agriculteurs",
           model="bernoulli",plotOpt="biclustering",
           degreeOrder=FALSE,axisX=TRUE,axisY=TRUE,angleX=45,
           mytitle=NULL,sizextick=0.5, sizeytick=0.5, ticklength = 0.25,
           colxtick="black",colytick="black")

Session informations

#> R version 4.2.1 (2022-06-23 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 19044)
#> 
#> Matrix products: default
#> 
#> locale:
#> [1] LC_COLLATE=French_France.utf8  LC_CTYPE=French_France.utf8   
#> [3] LC_MONETARY=French_France.utf8 LC_NUMERIC=C                  
#> [5] LC_TIME=French_France.utf8    
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] blockmodels4inventories_1.3.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_1.0.9               lattice_0.20-45          tidyr_1.2.0             
#>  [4] assertthat_0.2.1         rprojroot_2.0.3          digest_0.6.29           
#>  [7] utf8_1.2.2               ggforce_0.3.4            R6_2.5.1                
#> [10] evaluate_0.17            highr_0.9                ggfun_0.0.7             
#> [13] ggplot2_3.3.6            pillar_1.8.1             rlang_1.0.6             
#> [16] rstudioapi_0.13          jquerylib_0.1.4          Matrix_1.4-1            
#> [19] rmarkdown_2.17           pkgdown_2.0.6            textshaping_0.3.6       
#> [22] desc_1.4.1               stringr_1.4.1            htmlwidgets_1.5.4       
#> [25] polyclip_1.10-0          munsell_0.5.0            scatterpie_0.1.8        
#> [28] compiler_4.2.1           xfun_0.30                pkgconfig_2.0.3         
#> [31] systemfonts_1.0.4        htmltools_0.5.2          tidyselect_1.2.0        
#> [34] tibble_3.1.8             gridExtra_2.3            viridisLite_0.4.1       
#> [37] fansi_1.0.3              dplyr_1.0.9              withr_2.5.0             
#> [40] MASS_7.3-58.1            grid_4.2.1               jsonlite_1.8.2          
#> [43] gtable_0.3.1             lifecycle_1.0.3          DBI_1.1.3               
#> [46] magrittr_2.0.3           scales_1.2.1             cli_3.4.0               
#> [49] stringi_1.7.8            cachem_1.0.6             farver_2.1.1            
#> [52] viridis_0.6.2            fs_1.5.2                 sp_1.5-0                
#> [55] leaflet_2.1.1            bslib_0.4.0              ellipsis_0.3.2          
#> [58] ragg_1.2.2               generics_0.1.3           vctrs_0.4.1             
#> [61] RColorBrewer_1.1-3       tools_4.2.1              glue_1.6.2              
#> [64] tweenr_2.0.0             purrr_0.3.4              crosstalk_1.2.0         
#> [67] blockmodels_1.1.5        leaflet.minicharts_0.6.2 parallel_4.2.1          
#> [70] fastmap_1.1.0            yaml_2.3.5               colorspace_2.0-3        
#> [73] memoise_2.0.1            knitr_1.40               sass_0.4.2

N. Verzelen & I. Sanchez

2022-10-13

Create the matrix

Classical biclustering

Degree corrected biclustering

Session informations