Skip to contents

sigstats enables common mathematical operations / transformations to be applied to sigverse style signatures / catalogues

Installation

You can install the development version of sigstats like so:

if (!require("pak", quietly = TRUE))
    install.packages("pak")

pak::pak("selkamand/sigstats")

Quick Start

library(sigstats)
library(sigstash)
library(sigvis)

# Load a signature collection
signatures <- sig_load("COSMIC_v3.3.1_SBS_GRCh38")

# Create a model that represents a mix of SBS2 (40%) and SBS13 (60%)
model <- c(SBS2 = 0.4, SBS13 = 0.6)

# Add selected signatures to the combined model
combined_signatures <- sig_combine(signatures, model)

# Visualise result
sig_visualise(
  combined_signatures, 
  title = "Model",
  subtitle = "Created by combining SBS2 (40%) and SBS13 (60%)"
  )
#> ✔ All channels matched perfectly to set [sbs_96]. Using this set for sort order
#> ✔ All types matched perfectly to set [sbs_type]. Using this set for sort order
#> ✔ Types matched perfectly to palette [snv_type]

Signature Operations

sigstats helps you add and subtract catalogues/signatures

# Load a signature collection
signatures <- sig_load("COSMIC_v3.3.1_SBS_GRCh38")

# Reconstruct catalogues for two pure samples (each with 100 mutations)
catalogue1 <- sig_reconstruct(signatures[['SBS3']], n = 100)
catalogue2 <- sig_reconstruct(signatures[['SBS4']], n = 100)
catalogue3 <- sig_reconstruct(signatures[['SBS5']], n = 100)

# Subtract catalogue2 from catalogue1
difference <- catalogue1 %-% catalogue2

# Inspect result
head(difference)
#>     type channel    fraction      count
#> 193  C>A A[C>A]A 0.022382961 -2.1530987
#> 194  C>A A[C>A]C 0.017305082 -1.6646390
#> 195  C>A A[C>A]G 0.014874591 -1.4308412
#> 196  C>A A[C>A]T 0.018086125 -1.7397704
#> 197  C>G A[C>G]A 0.013392234  1.2882479
#> 198  C>G A[C>G]C 0.009122518  0.8775283

# Sum three catalogues
catalogue1 %+% catalogue2 %+% catalogue3
#>     type channel    fraction      count
#> 193  C>A A[C>A]A 0.025140845  7.5422534
#> 194  C>A A[C>A]C 0.019556620  5.8669861
#> 195  C>A A[C>A]G 0.006610550  1.9831650
#> 196  C>A A[C>A]T 0.016187902  4.8563707
#> 197  C>G A[C>G]A 0.012296054  3.6888161
#> 198  C>G A[C>G]C 0.006680065  2.0040195
#> 199  C>G A[C>G]G 0.001119236  0.3357709
#> 200  C>G A[C>G]T 0.010400639  3.1201918
#> 201  C>T A[C>T]A 0.018591614  5.5774841
#> 202  C>T A[C>T]C 0.011376016  3.4128049
#> 203  C>T A[C>T]G 0.003279210  0.9837630
#> 204  C>T A[C>T]T 0.012841364  3.8524091
#> 205  T>A A[T>A]A 0.007402060  2.2206179
#> 206  T>A A[T>A]C 0.005911249  1.7733748
#> 207  T>A A[T>A]G 0.010351356  3.1054067
#> 208  T>A A[T>A]T 0.005490303  1.6470908
#> 209  T>C A[T>C]A 0.023449410  7.0348230
#> 210  T>C A[T>C]C 0.007315471  2.1946412
#> 211  T>C A[T>C]G 0.018425765  5.5277296
#> 212  T>C A[T>C]T 0.018988012  5.6964035
#> 213  T>G A[T>G]A 0.002941629  0.8824886
#> 214  T>G A[T>G]C 0.001792487  0.5377460
#> 215  T>G A[T>G]G 0.005200386  1.5601158
#> 216  T>G A[T>G]T 0.002995597  0.8986790
#> 217  C>A C[C>A]A 0.036674366 11.0023099
#> 218  C>A C[C>A]C 0.036596975 10.9790925
#> 219  C>A C[C>A]G 0.010193282  3.0579845
#> 220  C>A C[C>A]T 0.030131351  9.0394053
#> 221  C>G C[C>G]A 0.011297774  3.3893323
#> 222  C>G C[C>G]C 0.009887456  2.9662367
#> 223  C>G C[C>G]G 0.002471278  0.7413834
#> 224  C>G C[C>G]T 0.012808497  3.8425491
#> 225  C>T C[C>T]A 0.015969598  4.7908794
#> 226  C>T C[C>T]C 0.016685828  5.0057485
#> 227  C>T C[C>T]G 0.007703069  2.3109208
#> 228  C>T C[C>T]T 0.020576421  6.1729262
#> 229  T>A C[T>A]A 0.008624313  2.5872938
#> 230  T>A C[T>A]C 0.011761654  3.5284962
#> 231  T>A C[T>A]G 0.016900186  5.0700558
#> 232  T>A C[T>A]T 0.009659740  2.8979221
#> 233  T>C C[T>C]A 0.009095737  2.7287211
#> 234  T>C C[T>C]C 0.009107742  2.7323227
#> 235  T>C C[T>C]G 0.012121890  3.6365670
#> 236  T>C C[T>C]T 0.011150273  3.3450820
#> 237  T>G C[T>G]A 0.002608360  0.7825080
#> 238  T>G C[T>G]C 0.004039645  1.2118936
#> 239  T>G C[T>G]G 0.006869552  2.0608655
#> 240  T>G C[T>G]T 0.005206204  1.5618613
#> 241  C>A G[C>A]A 0.016338672  4.9016016
#> 242  C>A G[C>A]C 0.017571383  5.2714149
#> 243  C>A G[C>A]G 0.006419293  1.9257880
#> 244  C>A G[C>A]T 0.012631887  3.7895661
#> 245  C>G G[C>G]A 0.007569400  2.2708199
#> 246  C>G G[C>G]C 0.006407871  1.9223614
#> 247  C>G G[C>G]G 0.002433293  0.7299880
#> 248  C>G G[C>G]T 0.008124929  2.4374787
#> 249  C>T G[C>T]A 0.013313488  3.9940464
#> 250  C>T G[C>T]C 0.012987867  3.8963602
#> 251  C>T G[C>T]G 0.004474679  1.3424036
#> 252  C>T G[C>T]T 0.011193117  3.3579350
#> 253  T>A G[T>A]A 0.006609142  1.9827425
#> 254  T>A G[T>A]C 0.004644861  1.3934582
#> 255  T>A G[T>A]G 0.009645684  2.8937051
#> 256  T>A G[T>A]T 0.007112597  2.1337790
#> 257  T>C G[T>C]A 0.008933222  2.6799666
#> 258  T>C G[T>C]C 0.004898068  1.4694205
#> 259  T>C G[T>C]G 0.009459062  2.8377186
#> 260  T>C G[T>C]T 0.008329559  2.4988678
#> 261  T>G G[T>G]A 0.002496523  0.7489569
#> 262  T>G G[T>G]C 0.001428218  0.4284653
#> 263  T>G G[T>G]G 0.006235766  1.8707298
#> 264  T>G G[T>G]T 0.002849820  0.8549461
#> 265  C>A T[C>A]A 0.018798045  5.6394135
#> 266  C>A T[C>A]C 0.024386104  7.3158311
#> 267  C>A T[C>A]G 0.004973720  1.4921161
#> 268  C>A T[C>A]T 0.021536377  6.4609132
#> 269  C>G T[C>G]A 0.008274661  2.4823983
#> 270  C>G T[C>G]C 0.011325426  3.3976278
#> 271  C>G T[C>G]G 0.001381567  0.4144701
#> 272  C>G T[C>G]T 0.013254230  3.9762690
#> 273  C>T T[C>T]A 0.009982055  2.9946166
#> 274  C>T T[C>T]C 0.014477519  4.3432556
#> 275  C>T T[C>T]G 0.005838103  1.7514308
#> 276  C>T T[C>T]T 0.012243639  3.6730918
#> 277  T>A T[T>A]A 0.007142323  2.1426968
#> 278  T>A T[T>A]C 0.006436888  1.9310663
#> 279  T>A T[T>A]G 0.007066453  2.1199359
#> 280  T>A T[T>A]T 0.009156982  2.7470946
#> 281  T>C T[T>C]A 0.011716363  3.5149088
#> 282  T>C T[T>C]C 0.006519940  1.9559820
#> 283  T>C T[T>C]G 0.007173081  2.1519242
#> 284  T>C T[T>C]T 0.011048486  3.3145458
#> 285  T>G T[T>G]A 0.004227468  1.2682404
#> 286  T>G T[T>G]C 0.004418606  1.3255818
#> 287  T>G T[T>G]G 0.005811939  1.7435818
#> 288  T>G T[T>G]T 0.008216595  2.4649784

# Sum a catalogue collection
catalogues <- list(cat1 = catalogue1, cat2 = catalogue2, cat3 = catalogue3)
sig_sum(catalogues)
#>     type channel    fraction      count
#> 193  C>A A[C>A]A 0.025140845  7.5422534
#> 194  C>A A[C>A]C 0.019556620  5.8669861
#> 195  C>A A[C>A]G 0.006610550  1.9831650
#> 196  C>A A[C>A]T 0.016187902  4.8563707
#> 197  C>G A[C>G]A 0.012296054  3.6888161
#> 198  C>G A[C>G]C 0.006680065  2.0040195
#> 199  C>G A[C>G]G 0.001119236  0.3357709
#> 200  C>G A[C>G]T 0.010400639  3.1201918
#> 201  C>T A[C>T]A 0.018591614  5.5774841
#> 202  C>T A[C>T]C 0.011376016  3.4128049
#> 203  C>T A[C>T]G 0.003279210  0.9837630
#> 204  C>T A[C>T]T 0.012841364  3.8524091
#> 205  T>A A[T>A]A 0.007402060  2.2206179
#> 206  T>A A[T>A]C 0.005911249  1.7733748
#> 207  T>A A[T>A]G 0.010351356  3.1054067
#> 208  T>A A[T>A]T 0.005490303  1.6470908
#> 209  T>C A[T>C]A 0.023449410  7.0348230
#> 210  T>C A[T>C]C 0.007315471  2.1946412
#> 211  T>C A[T>C]G 0.018425765  5.5277296
#> 212  T>C A[T>C]T 0.018988012  5.6964035
#> 213  T>G A[T>G]A 0.002941629  0.8824886
#> 214  T>G A[T>G]C 0.001792487  0.5377460
#> 215  T>G A[T>G]G 0.005200386  1.5601158
#> 216  T>G A[T>G]T 0.002995597  0.8986790
#> 217  C>A C[C>A]A 0.036674366 11.0023099
#> 218  C>A C[C>A]C 0.036596975 10.9790925
#> 219  C>A C[C>A]G 0.010193282  3.0579845
#> 220  C>A C[C>A]T 0.030131351  9.0394053
#> 221  C>G C[C>G]A 0.011297774  3.3893323
#> 222  C>G C[C>G]C 0.009887456  2.9662367
#> 223  C>G C[C>G]G 0.002471278  0.7413834
#> 224  C>G C[C>G]T 0.012808497  3.8425491
#> 225  C>T C[C>T]A 0.015969598  4.7908794
#> 226  C>T C[C>T]C 0.016685828  5.0057485
#> 227  C>T C[C>T]G 0.007703069  2.3109208
#> 228  C>T C[C>T]T 0.020576421  6.1729262
#> 229  T>A C[T>A]A 0.008624313  2.5872938
#> 230  T>A C[T>A]C 0.011761654  3.5284962
#> 231  T>A C[T>A]G 0.016900186  5.0700558
#> 232  T>A C[T>A]T 0.009659740  2.8979221
#> 233  T>C C[T>C]A 0.009095737  2.7287211
#> 234  T>C C[T>C]C 0.009107742  2.7323227
#> 235  T>C C[T>C]G 0.012121890  3.6365670
#> 236  T>C C[T>C]T 0.011150273  3.3450820
#> 237  T>G C[T>G]A 0.002608360  0.7825080
#> 238  T>G C[T>G]C 0.004039645  1.2118936
#> 239  T>G C[T>G]G 0.006869552  2.0608655
#> 240  T>G C[T>G]T 0.005206204  1.5618613
#> 241  C>A G[C>A]A 0.016338672  4.9016016
#> 242  C>A G[C>A]C 0.017571383  5.2714149
#> 243  C>A G[C>A]G 0.006419293  1.9257880
#> 244  C>A G[C>A]T 0.012631887  3.7895661
#> 245  C>G G[C>G]A 0.007569400  2.2708199
#> 246  C>G G[C>G]C 0.006407871  1.9223614
#> 247  C>G G[C>G]G 0.002433293  0.7299880
#> 248  C>G G[C>G]T 0.008124929  2.4374787
#> 249  C>T G[C>T]A 0.013313488  3.9940464
#> 250  C>T G[C>T]C 0.012987867  3.8963602
#> 251  C>T G[C>T]G 0.004474679  1.3424036
#> 252  C>T G[C>T]T 0.011193117  3.3579350
#> 253  T>A G[T>A]A 0.006609142  1.9827425
#> 254  T>A G[T>A]C 0.004644861  1.3934582
#> 255  T>A G[T>A]G 0.009645684  2.8937051
#> 256  T>A G[T>A]T 0.007112597  2.1337790
#> 257  T>C G[T>C]A 0.008933222  2.6799666
#> 258  T>C G[T>C]C 0.004898068  1.4694205
#> 259  T>C G[T>C]G 0.009459062  2.8377186
#> 260  T>C G[T>C]T 0.008329559  2.4988678
#> 261  T>G G[T>G]A 0.002496523  0.7489569
#> 262  T>G G[T>G]C 0.001428218  0.4284653
#> 263  T>G G[T>G]G 0.006235766  1.8707298
#> 264  T>G G[T>G]T 0.002849820  0.8549461
#> 265  C>A T[C>A]A 0.018798045  5.6394135
#> 266  C>A T[C>A]C 0.024386104  7.3158311
#> 267  C>A T[C>A]G 0.004973720  1.4921161
#> 268  C>A T[C>A]T 0.021536377  6.4609132
#> 269  C>G T[C>G]A 0.008274661  2.4823983
#> 270  C>G T[C>G]C 0.011325426  3.3976278
#> 271  C>G T[C>G]G 0.001381567  0.4144701
#> 272  C>G T[C>G]T 0.013254230  3.9762690
#> 273  C>T T[C>T]A 0.009982055  2.9946166
#> 274  C>T T[C>T]C 0.014477519  4.3432556
#> 275  C>T T[C>T]G 0.005838103  1.7514308
#> 276  C>T T[C>T]T 0.012243639  3.6730918
#> 277  T>A T[T>A]A 0.007142323  2.1426968
#> 278  T>A T[T>A]C 0.006436888  1.9310663
#> 279  T>A T[T>A]G 0.007066453  2.1199359
#> 280  T>A T[T>A]T 0.009156982  2.7470946
#> 281  T>C T[T>C]A 0.011716363  3.5149088
#> 282  T>C T[T>C]C 0.006519940  1.9559820
#> 283  T>C T[T>C]G 0.007173081  2.1519242
#> 284  T>C T[T>C]T 0.011048486  3.3145458
#> 285  T>G T[T>G]A 0.004227468  1.2682404
#> 286  T>G T[T>G]C 0.004418606  1.3255818
#> 287  T>G T[T>G]G 0.005811939  1.7435818
#> 288  T>G T[T>G]T 0.008216595  2.4649784

Reconstruct a mutation catalogue from a signature model

We often need to reconstruct a catalogue (or tally) from our signature model.

# Load a signature collection
signatures <- sig_load("COSMIC_v3.3.1_SBS_GRCh38")

# Create a model that represents a mix of SBS2 (40%) and SBS13 (60%)
model <- c(SBS2 = 0.4, SBS13 = 0.6)

# Create a new signature by combining SBS2 and SBS13 in ratios dictated by the above model
signature <- sig_combine(signatures, model)

# Reconstruct a perfect catalogue describing what the mutational profile of a sample
# with 200 mutations and the given signature model would look like
reconstuction <- sig_reconstruct(signature, n=200)

# Visualise result
sig_visualise(
  reconstuction, 
  class = "catalogue",
  title = "Reconstructed Catalogue",
  subtitle = "Expected profile of a sample with 200 mutations: 40% from SBS2, 60% from SBS13"
  )
#> ✔ All channels matched perfectly to set [sbs_96]. Using this set for sort order
#> ✔ All types matched perfectly to set [sbs_type]. Using this set for sort order
#> ✔ Types matched perfectly to palette [snv_type]

Compute Stats on Signature Collections


# Load a signature collection
signatures <- sig_load("COSMIC_v3.3.1_SBS_GRCh38")

# Compute common statistics:
# e.g. Gini coefficient, Exponentiated Shannon Index, KL divergence, L1/L2/L3 Norms)
stats <- sig_collection_stats(signatures)

# Print Stats
head(stats)
#>     id      gini shannon_index shannon_index_exp kl_divergence_from_uniform
#> 1 SBS1 0.9480089      1.856082          6.398621                  2.7082657
#> 2 SBS2 0.9798792      1.218777          3.383048                  3.3455711
#> 3 SBS3 0.3268209      4.385754         80.298771                  0.1785939
#> 4 SBS4 0.6456680      3.809528         45.129134                  0.7548202
#> 5 SBS5 0.4063016      4.296474         73.440415                  0.2678738
#> 6 SBS6 0.8851745      2.718273         15.154126                  1.8460754
#>      l3_norm   l2_norm l1_norm l0_norm max_channel_fraction
#> 1 0.41142780 0.4844887       1      96           0.37062390
#> 2 0.56614330 0.6231289       1      96           0.53513019
#> 3 0.06042455 0.1174484       1      96           0.02499156
#> 4 0.12333330 0.1852696       1      96           0.08026888
#> 5 0.07475926 0.1306525       1      96           0.04597922
#> 6 0.23115412 0.3114406       1      96           0.17879063
#>   shannon_index_exp_scaled l3_norm_scaled l2_norm_scaled l1_norm_scaled
#> 1               0.06665230   0.0042857062    0.005046757     0.01041667
#> 2               0.03524008   0.0058973261    0.006490926     0.01041667
#> 3               0.83644553   0.0006294224    0.001223421     0.01041667
#> 4               0.47009514   0.0012847218    0.001929892     0.01041667
#> 5               0.76500432   0.0007787423    0.001360964     0.01041667
#> 6               0.15785548   0.0024078554    0.003244173     0.01041667
#>   l0_norm_scaled
#> 1              1
#> 2              1
#> 3              1
#> 4              1
#> 5              1
#> 6              1

# Plot kl_divergence_from_uniform against exponentiated shannon index
plot(
  x = stats[["kl_divergence_from_uniform"]], 
  y = stats[["shannon_index_exp_scaled"]]
)

### Compute Similarity/Distance Measures

signatures <- sig_load("COSMIC_v3.3.1_SBS_GRCh38")

# Pairwise cosine similarity
sim_df <- sig_collection_pairwise_stats(
  signatures,
  metric = "cosine_similarity",
  format = "data.frame"
)
head(sim_df)
#>     S1    S2 cosine_similarity
#> 1 SBS1  SBS2        0.02031489
#> 2 SBS1  SBS3        0.05372626
#> 3 SBS1  SBS4        0.02361321
#> 4 SBS1  SBS5        0.19426687
#> 5 SBS1  SBS6        0.77021158
#> 6 SBS1 SBS7a        0.04804510

# L2 (Euclidean) distance matrix
dist_mat <- sig_collection_pairwise_stats(
  signatures,
  metric = "L2",
  format = "matrix"
)

Model Correctness

Quantify how well a fitted model matches ground truth signature weights.


signatures <- sig_load("COSMIC_v3.3.1_SBS_GRCh38")
cosmic_signatures <- names(signatures)

observed <- c(SBS1 = 0.7, SBS5 = 0.3, SBS18 = 0)
truth <- c(SBS1 = 0.6, SBS5 = 0.4, SBS18 = 0)

metrics <- sig_model_correctness(observed, truth, all_signatures = cosmic_signatures)

print(metrics)
#> $fitting_error
#> [1] 0.1
#> 
#> $RMSE
#> [1] 0.01591115
#> 
#> $n_false_positives
#> [1] 0
#> 
#> $n_true_positives
#> [1] 2
#> 
#> $n_false_negatives
#> [1] 0
#> 
#> $n_true_negatives
#> [1] 77
#> 
#> $total_false_positive_contributions
#> [1] 0
#> 
#> $precision
#> [1] 1
#> 
#> $recall
#> [1] 1
#> 
#> $specificity
#> [1] 1
#> 
#> $mathews_correlation_coeff
#> [1] 1
#> 
#> $f1
#> [1] 1
#> 
#> $balanced_accuracy
#> [1] 1