de novo algorithm for computing molecular formulas. Using fragmentation trees we are able to generate a resultant molecular formula. To ensure efficient we are using a greedy heurstic to generate the resultant formula. Although this may not always result in the correct prediction, it allows us to efficiently calculate a multitudeof chemical formulas.
Arguments
- mass_data
your mass_data object generated from
ms2_ms1_compare()- parent_ppm
the ppm you wish to generate the candidate molecular formulas.
- number_of_threads
the number of threads you wish to use for this calculation.
References
Sebastian Böcker, Florian Rasche, Towards de novo identification of metabolites by analyzing tandem mass spectra, Bioinformatics, Volume 24, Issue 16, August 2008, Pages i49–i55, https://doi.org/10.1093/bioinformatics/btn270
Examples
data <-
import_all_data(peak_table =
mums2::mums2_example("botryllus_pt_small.csv"),
metadata =
mums2::mums2_example("boryillus_metadata.csv"),
format = "None")
matched_data <- ms2_ms1_compare(mums2_example("botryllus_v2.gnps.mgf"),
data, 0.1, 1)
#> Reading: /home/runner/work/_temp/Library/mums2/extdata/botryllus_v2.gnps.mgf ...
#> 1/349 peaks have an MS2 spectra.
compute_molecular_formulas(matched_data, number_of_threads = 2)
#> Calculating potential molecular formulas...
#> Calculating fragmentation trees...
#> 1/1 chemical formulas were predicted
#> $ms2_matches
#> mz rt ms1_compound_id spectra_index ms2_spectrum_id
#> 1 873.2658 8.85 872.25849 Da 530.21 s 1 mz873.26578rt8.85
#>
#> $peak_data
#> $peak_data[[1]]
#> $peak_data[[1]]$mz
#> [1] 221.0873 281.0498 355.0657 503.1129 551.1719 815.2109 873.2607 874.2649
#> [9] 874.3089 875.2434 875.2625 875.2862 876.2699 877.2638
#>
#> $peak_data[[1]]$intensity
#> [1] 99.057846 49.647755 99.334900 132.098430 5.310822 64.343390
#> [7] 290.572940 357.748320 24.153889 106.529755 204.054810 26.993818
#> [13] 44.469600 40.590275
#>
#>
#>
#> $ms1_data
#> compound mz rt 221012_DGM_Blank1_1_1_390
#> <char> <num> <num> <num>
#> 1: 413.12834 Da 319.32 s 414.1356 5.32 1643.672
#> 2: 655.22101 Da 481.62 s 656.2283 8.03 0.000
#> 3: 904.69182 Da 604.24 s 905.6991 10.07 0.000
#> 4: 798.49680 Da 604.00 s 799.5041 10.07 6537.559
#> 5: 641.41035 Da 429.52 s 642.4176 7.16 0.000
#> ---
#> 345: 1214.82162 Da 567.08 s 1215.8289 9.45 0.000
#> 346: 631.10947 Da 525.38 s 632.1168 8.76 105276.031
#> 347: 669.41104 Da 447.19 s 670.4183 7.45 0.000
#> 348: 1047.69861 Da 399.82 s 524.8566 6.66 0.000
#> 349: 385.17025 Da 339.63 s 368.1670 5.66 1294.992
#> 221012_DGM_Blank1_1_2_391 221012_DGM_Blank1_1_3_392
#> <num> <num>
#> 1: 1683.589 1581.715
#> 2: 0.000 0.000
#> 3: 0.000 4459.532
#> 4: 0.000 0.000
#> 5: 0.000 0.000
#> ---
#> 345: 7586.120 0.000
#> 346: 250899.562 188218.422
#> 347: 0.000 0.000
#> 348: 0.000 0.000
#> 349: 1395.290 2781.298
#> 221012_DGM_MB1588_3_1_395 221012_DGM_MB1588_3_2_396
#> <num> <num>
#> 1: 0.000 1842.961
#> 2: 0.000 0.000
#> 3: 0.000 0.000
#> 4: 0.000 0.000
#> 5: 0.000 0.000
#> ---
#> 345: 5093.665 0.000
#> 346: 216876.562 71004.234
#> 347: 0.000 0.000
#> 348: 17171.740 13720.639
#> 349: 3009.092 0.000
#> 221012_DGM_MB1588_3_3_397 221012_DGM_MB1589_4_1_398
#> <num> <num>
#> 1: 0.000 1540.177
#> 2: 8839.863 0.000
#> 3: 4535.730 0.000
#> 4: 0.000 8426.455
#> 5: 0.000 0.000
#> ---
#> 345: 4170.296 2996.647
#> 346: 0.000 117038.008
#> 347: 0.000 0.000
#> 348: 0.000 20115.963
#> 349: 0.000 2736.941
#> 221012_DGM_MB1589_4_2_399 221012_DGM_MB1589_4_3_400
#> <num> <num>
#> 1: 1379.541 1349.259
#> 2: 0.000 8058.202
#> 3: 0.000 4271.522
#> 4: 11090.130 7521.545
#> 5: 0.000 15678.532
#> ---
#> 345: 4448.148 3992.679
#> 346: 49952.098 90489.875
#> 347: 0.000 0.000
#> 348: 18680.271 19333.010
#> 349: 0.000 0.000
#> 221012_DGM_MB1590_5_1_401 221012_DGM_MB1590_5_2_402
#> <num> <num>
#> 1: 1736.740 1674.615
#> 2: 0.000 0.000
#> 3: 0.000 0.000
#> 4: 13776.298 8939.586
#> 5: 15058.773 16887.570
#> ---
#> 345: 3417.108 3036.977
#> 346: 123575.500 65562.547
#> 347: 0.000 0.000
#> 348: 20965.141 23315.670
#> 349: 4141.860 0.000
#> 221012_DGM_MB1590_5_3_403 221012_DGM_Blank2_1_1_404
#> <num> <num>
#> 1: 0.000 1661.673
#> 2: 0.000 16815.256
#> 3: 0.000 0.000
#> 4: 0.000 5920.994
#> 5: 15524.373 0.000
#> ---
#> 345: 5055.946 3621.072
#> 346: 77107.211 107155.930
#> 347: 10511.342 0.000
#> 348: 24176.549 0.000
#> 349: 4057.747 3571.253
#> 221012_DGM_Blank2_1_2_405 221012_DGM_Blank2_1_3_406
#> <num> <num>
#> 1: 2047.950 1189.160
#> 2: 17501.145 5604.232
#> 3: 0.000 0.000
#> 4: 0.000 3559.699
#> 5: 0.000 0.000
#> ---
#> 345: 0.000 1671.865
#> 346: 116357.016 74528.086
#> 347: 0.000 0.000
#> 348: 0.000 0.000
#> 349: 4414.529 4955.091
#> 221012_DGM_MB1591_6_1_407 221012_DGM_MB1591_6_2_408
#> <num> <num>
#> 1: 1573.228 0.000
#> 2: 0.000 0.000
#> 3: 2709.508 2182.018
#> 4: 5700.309 2390.115
#> 5: 0.000 0.000
#> ---
#> 345: 3505.068 1726.905
#> 346: 74032.852 48017.660
#> 347: 0.000 5067.469
#> 348: 20058.316 19373.340
#> 349: 4421.446 5214.008
#> 221012_DGM_MB1591_6_3_409 221012_DGM_MB1592_7_1_410
#> <num> <num>
#> 1: 1092.1801 1813.8533
#> 2: 6107.0801 2540.8010
#> 3: 0.0000 0.0000
#> 4: 5241.5439 0.0000
#> 5: 13128.4307 0.0000
#> ---
#> 345: 428.6547 396.7803
#> 346: 4958.4941 5093.5698
#> 347: 0.0000 0.0000
#> 348: 19019.5156 153359.2969
#> 349: 4691.7407 4503.3232
#> 221012_DGM_MB1592_7_2_411 221012_DGM_MB1592_7_3_412
#> <num> <num>
#> 1: 1505.6332 1203.859
#> 2: 2162.7383 11592.640
#> 3: 2738.0195 0.000
#> 4: 0.0000 3995.891
#> 5: 0.0000 0.000
#> ---
#> 345: 397.5974 1114.714
#> 346: 2140.3064 6304.764
#> 347: 0.0000 0.000
#> 348: 164804.3438 151955.406
#> 349: 4473.4932 5948.957
#> 221012_DGM_MB1593_8_1_413 221012_DGM_MB1593_8_2_414
#> <num> <num>
#> 1: 1327.427 1740.172
#> 2: 0.000 4208.383
#> 3: 0.000 0.000
#> 4: 5239.572 7006.156
#> 5: 16500.930 0.000
#> ---
#> 345: 0.000 1501.688
#> 346: 19399.158 23493.648
#> 347: 0.000 0.000
#> 348: 18410.760 17282.258
#> 349: 4591.124 5241.212
#> 221012_DGM_MB1593_8_3_415 221012_DGM_MB1594_9_1_416
#> <num> <num>
#> 1: 1380.644 1635.196
#> 2: 7293.763 0.000
#> 3: 0.000 3350.652
#> 4: 8872.400 3247.472
#> 5: 0.000 0.000
#> ---
#> 345: 1121.249 0.000
#> 346: 29509.277 22870.949
#> 347: 0.000 0.000
#> 348: 23607.289 18771.344
#> 349: 5819.886 5785.732
#> 221012_DGM_MB1594_9_2_417 221012_DGM_MB1594_9_3_418
#> <num> <num>
#> 1: 1337.972 1377.6218
#> 2: 0.000 7568.2002
#> 3: 0.000 0.0000
#> 4: 7382.616 0.0000
#> 5: 0.000 0.0000
#> ---
#> 345: 1567.752 215.3292
#> 346: 12036.664 6263.7222
#> 347: 0.000 0.0000
#> 348: 28021.629 28935.6602
#> 349: 0.000 4331.6118
#> 221012_DGM_Blank3_1_1_419 221012_DGM_Blank3_1_2_420
#> <num> <num>
#> 1: 1256.6434 1920.330
#> 2: 10593.5996 0.000
#> 3: 0.0000 5863.393
#> 4: 0.0000 0.000
#> 5: 0.0000 0.000
#> ---
#> 345: 245.1474 0.000
#> 346: 7260.0474 4564.664
#> 347: 0.0000 0.000
#> 348: 0.0000 0.000
#> 349: 6345.4502 7435.591
#> 221012_DGM_Blank3_1_3_421 221012_DGM_MB1595_10_1_422
#> <num> <num>
#> 1: 1438.921 1270.993
#> 2: 8887.850 3453.785
#> 3: 0.000 3040.876
#> 4: 3100.021 0.000
#> 5: 0.000 0.000
#> ---
#> 345: 0.000 0.000
#> 346: 3324.721 4895.843
#> 347: 0.000 0.000
#> 348: 0.000 39494.633
#> 349: 6385.429 7092.543
#> 221012_DGM_MB1595_10_2_423 221012_DGM_MB1595_10_3_424
#> <num> <num>
#> 1: 1232.581 1769.550
#> 2: 2805.313 3917.201
#> 3: 0.000 3773.011
#> 4: 3969.096 3393.127
#> 5: 0.000 0.000
#> ---
#> 345: 0.000 0.000
#> 346: 1643.265 1130.276
#> 347: 0.000 0.000
#> 348: 36885.684 42443.051
#> 349: 6076.128 7525.724
#> 221012_DGM_MB1597_11_1_425 221012_DGM_MB1597_11_2_426
#> <num> <num>
#> 1: 1469.4241 1916.736
#> 2: 0.0000 5492.965
#> 3: 1994.7001 0.000
#> 4: 0.0000 7475.469
#> 5: 0.0000 0.000
#> ---
#> 345: 210.3816 0.000
#> 346: 2539.5093 2856.154
#> 347: 13357.9258 11344.504
#> 348: 98078.5469 91227.000
#> 349: 7314.3442 8651.141
#> 221012_DGM_MB1597_11_3_427 221012_DGM_MB1598_12_1_428
#> <num> <num>
#> 1: 2275.196 0.000
#> 2: 3876.187 0.000
#> 3: 0.000 0.000
#> 4: 8617.719 0.000
#> 5: 0.000 0.000
#> ---
#> 345: 0.000 0.000
#> 346: 6271.732 10035.759
#> 347: 0.000 6228.568
#> 348: 127230.516 0.000
#> 349: 7965.510 0.000
#> 221012_DGM_MB1598_12_2_429 221012_DGM_MB1598_12_3_430
#> <num> <num>
#> 1: 1318.554 0.000
#> 2: 0.000 6346.356
#> 3: 0.000 5324.371
#> 4: 0.000 3165.401
#> 5: 0.000 0.000
#> ---
#> 345: 0.000 0.000
#> 346: 5767.335 5915.734
#> 347: 7533.326 9059.569
#> 348: 71575.617 0.000
#> 349: 0.000 0.000
#> 221012_DGM_MB1599_13_1_431 221012_DGM_MB1599_13_2_432
#> <num> <num>
#> 1: 1066.136 0.0000
#> 2: 6197.055 13562.3877
#> 3: 6451.301 4927.1460
#> 4: 2199.003 0.0000
#> 5: 0.000 0.0000
#> ---
#> 345: 0.000 240.6711
#> 346: 8082.012 12565.3301
#> 347: 4910.621 0.0000
#> 348: 61975.418 63947.1250
#> 349: 8023.056 0.0000
#> 221012_DGM_MB1599_13_3_433 221012_DGM_Blank4_1_1_434
#> <num> <num>
#> 1: 966.0234 1861.986
#> 2: 32407.8379 15096.834
#> 3: 5301.7993 0.000
#> 4: 2455.8958 2673.542
#> 5: 0.0000 13200.256
#> ---
#> 345: 452.7471 0.000
#> 346: 18536.6406 21437.131
#> 347: 0.0000 0.000
#> 348: 58444.6094 0.000
#> 349: 9803.4199 7831.626
#> 221012_DGM_Blank4_1_2_435 221012_DGM_Blank4_1_3_436 kmd
#> <num> <num> <num>
#> 1: 1202.606 1478.475 0.13562
#> 2: 24745.084 15284.740 0.22829
#> 3: 508.500 0.000 0.69910
#> 4: 4999.417 3814.854 0.50408
#> 5: 0.000 0.000 0.41763
#> ---
#> 345: 0.000 0.000 0.82889
#> 346: 15323.505 10285.567 0.11675
#> 347: 0.000 0.000 0.41831
#> 348: 0.000 0.000 0.85658
#> 349: 7362.686 0.000 0.16701
#>
#> $samples
#> [1] "221012_DGM_Blank1_1_1_390" "221012_DGM_Blank1_1_2_391"
#> [3] "221012_DGM_Blank1_1_3_392" "221012_DGM_MB1588_3_1_395"
#> [5] "221012_DGM_MB1588_3_2_396" "221012_DGM_MB1588_3_3_397"
#> [7] "221012_DGM_MB1589_4_1_398" "221012_DGM_MB1589_4_2_399"
#> [9] "221012_DGM_MB1589_4_3_400" "221012_DGM_MB1590_5_1_401"
#> [11] "221012_DGM_MB1590_5_2_402" "221012_DGM_MB1590_5_3_403"
#> [13] "221012_DGM_Blank2_1_1_404" "221012_DGM_Blank2_1_2_405"
#> [15] "221012_DGM_Blank2_1_3_406" "221012_DGM_MB1591_6_1_407"
#> [17] "221012_DGM_MB1591_6_2_408" "221012_DGM_MB1591_6_3_409"
#> [19] "221012_DGM_MB1592_7_1_410" "221012_DGM_MB1592_7_2_411"
#> [21] "221012_DGM_MB1592_7_3_412" "221012_DGM_MB1593_8_1_413"
#> [23] "221012_DGM_MB1593_8_2_414" "221012_DGM_MB1593_8_3_415"
#> [25] "221012_DGM_MB1594_9_1_416" "221012_DGM_MB1594_9_2_417"
#> [27] "221012_DGM_MB1594_9_3_418" "221012_DGM_Blank3_1_1_419"
#> [29] "221012_DGM_Blank3_1_2_420" "221012_DGM_Blank3_1_3_421"
#> [31] "221012_DGM_MB1595_10_1_422" "221012_DGM_MB1595_10_2_423"
#> [33] "221012_DGM_MB1595_10_3_424" "221012_DGM_MB1597_11_1_425"
#> [35] "221012_DGM_MB1597_11_2_426" "221012_DGM_MB1597_11_3_427"
#> [37] "221012_DGM_MB1598_12_1_428" "221012_DGM_MB1598_12_2_429"
#> [39] "221012_DGM_MB1598_12_3_430" "221012_DGM_MB1599_13_1_431"
#> [41] "221012_DGM_MB1599_13_2_432" "221012_DGM_MB1599_13_3_433"
#> [43] "221012_DGM_Blank4_1_1_434" "221012_DGM_Blank4_1_2_435"
#> [45] "221012_DGM_Blank4_1_3_436"
#>
#> $predicted_molecular_formulas
#> [1] "C17H57N13O17P2S3"
#>
#> attr(,"class")
#> [1] "mass_data"