


/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/


// ./tests/peptidoms/catch2-only-peptidoms [fine4291] -s


//    msconvert
//    /gorgone/pappso/formation/Janvier2014/TD/mzXML/20120906_balliau_extract_1_A01_urnb-1.mzXML
//    --filter "index 4291" --mgf

#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <catch2/matchers/catch_matchers_vector.hpp>

#include <QString>
#include <QFile>
#include <iostream>
#include "../common.h"
#include "tests/tests-config.h"
#include <pappsomspp/core/amino_acid/aastringcodemassmatching.h>
#include <pappsomspp/core/peptide/peptideproformaparser.h>
#include <pappsomspp/core/fasta/fastareader.h>
#include <pappsomspp/core/fasta/fastafileindexer.h>
#include <pappsomspp/core/massspectrum/massspectrum.h>
#include <pappsomspp/core/processing/filters/filterresample.h>
#include <pappsomspp/core/processing/filters/filterpass.h>
#include <pappsomspp/core/processing/filters/filterchargedeconvolution.h>
#include <pappsomspp/core/processing/specpeptidoms/correctiontree.h>
#include <pappsomspp/core/processing/specpeptidoms/locationsaver.h>
#include <pappsomspp/core/processing/specpeptidoms/semiglobalalignment.h>
#include <pappsomspp/core/processing/specpeptidoms/spomsspectrum.h>
#include <pappsomspp/core/processing/specpeptidoms/types.h>
#include <pappsomspp/core/protein/protein.h>
#include <pappsomspp/core/msfile/msfileaccessor.h>
#include <pappsomspp/core/msrun/output/mgfoutput.h>


TEST_CASE("test for peptidoms alignment.", "[fine4291]")
{
  // Set the debugging message formatting pattern.
  qSetMessagePattern(QString("%{file}@%{line}, %{function}(): %{message}"));


  SECTION("..:: Check precise alignment for GRMZM2G083841_P01 on spectrum 4291 ::..", "[fine4291]")
  {


    pappso::QualifiedMassSpectrum spectrum_simple = readQualifiedMassSpectrumMgf(
      QString(CMAKE_SOURCE_DIR)
        .append("/tests/data/scans/20120906_balliau_extract_1_A01_urnb-1_4291_deisotope_60.mgf"));


    REQUIRE(spectrum_simple.size() == 60);

    pappso::AaCode aa_code;
    aa_code.addAaModification('C', pappso::AaModification::getInstance("MOD:00397"));


    pappso::specpeptidoms::SpOMSProtein protein(
      "GRMZM2G083841_P01",
      "MATAPSDVLAVELLQRECGVRQPLPVVPLFERLADLQSAPASVERLFSVDWYMDRIKGKQQVMVGYSDSGKDAGRLSAAWQLYRAQEEMAQV"
      "AKRYGVKLTLFH",
      aa_code);

    pappso::PrecisionPtr precision_ptr = pappso::PrecisionFactory::getDaltonInstance(0.02);
    /*

        QFile mgf_out("20120906_balliau_extract_1_A01_urnb-1_4291_deisotope_60.mgf");
        mgf_out.open(QFile::WriteOnly);

        pappso::MgfOutput mgf_writer(&mgf_out);

        mgf_writer.write(spectrum_simple);
        */


    pappso::specpeptidoms::SpOMSSpectrum experimental_spectrum(
      spectrum_simple, precision_ptr, aa_code);


    auto spectrum_M_pos = experimental_spectrum.getAaPositions(aa_code.getAaCode('M'));
    REQUIRE(spectrum_M_pos.size() == 0);

    auto spectrum_Y_pos = experimental_spectrum.getAaPositions(aa_code.getAaCode('Y'));

    for(auto position_peak : spectrum_Y_pos)
      {
        std::cout << "Y peak l:" << (int)position_peak.l_peak
                  << " mz:" << (double)experimental_spectrum.at(position_peak.l_peak).peak_mz
                  << std::endl;
        std::cout << " peak r:" << (int)position_peak.r_peak
                  << " mz:" << (double)experimental_spectrum.at(position_peak.r_peak).peak_mz
                  << std::endl;

        REQUIRE(experimental_spectrum.at(position_peak.r_peak).peak_mz -
                  experimental_spectrum.at(position_peak.l_peak).peak_mz ==
                Catch::Approx(aa_code.getMass('Y')).epsilon(0.001));
      }


    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(5).r_peak).peak_mz -
              experimental_spectrum.at(spectrum_Y_pos.at(5).l_peak).peak_mz ==
            Catch::Approx(aa_code.getMass('Y')).epsilon(0.001));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(6).r_peak).peak_mz -
              experimental_spectrum.at(spectrum_Y_pos.at(6).l_peak).peak_mz ==
            Catch::Approx(aa_code.getMass('Y')).epsilon(0.001));
    /*
    * [Thread-0] INFO main.Main - Y: 18 8
[Thread-0] INFO main.Main - (175,12 ,338,18)*
[Thread-0] INFO main.Main - (257,12 ,420,19)*
[Thread-0] INFO main.Main - (493,22 ,656,29)*
[Thread-0] INFO main.Main - (496,26 ,659,32)*
[Thread-0] INFO main.Main - (656,29 ,819,34)*
[Thread-0] INFO main.Main - (659,32 ,822,38)*
[Thread-0] INFO main.Main - (895,41 ,1058,48)
[Thread-0] INFO main.Main - (977,42 ,1140,48)
*/
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(0).l_peak).peak_mz ==
            Catch::Approx(175.12).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(0).r_peak).peak_mz ==
            Catch::Approx(338.18).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(1).l_peak).peak_mz ==
            Catch::Approx(257.12).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(1).r_peak).peak_mz ==
            Catch::Approx(420.19).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(2).l_peak).peak_mz ==
            Catch::Approx(493.22).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(2).r_peak).peak_mz ==
            Catch::Approx(656.29).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(3).l_peak).peak_mz ==
            Catch::Approx(496.26).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(3).r_peak).peak_mz ==
            Catch::Approx(659.32).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(4).l_peak).peak_mz ==
            Catch::Approx(656.29).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(4).r_peak).peak_mz ==
            Catch::Approx(819.34).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(5).l_peak).peak_mz ==
            Catch::Approx(659.32).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(5).r_peak).peak_mz ==
            Catch::Approx(822.38).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(6).l_peak).peak_mz ==
            Catch::Approx(895.41).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(6).r_peak).peak_mz ==
            Catch::Approx(1058.48).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(7).l_peak).peak_mz ==
            Catch::Approx(977.42).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_Y_pos.at(7).r_peak).peak_mz ==
            Catch::Approx(1140.48).epsilon(0.01));

    auto spectrum_R_pos = experimental_spectrum.getAaPositions(aa_code.getAaCode('R'));

    for(auto position_peak : spectrum_R_pos)
      {
        std::cout << "R peak l:" << (int)position_peak.l_peak
                  << " mz:" << (double)experimental_spectrum.at(position_peak.l_peak).peak_mz
                  << std::endl;
        std::cout << " peak r:" << (int)position_peak.r_peak
                  << " mz:" << (double)experimental_spectrum.at(position_peak.r_peak).peak_mz
                  << std::endl;

        REQUIRE(experimental_spectrum.at(position_peak.r_peak).peak_mz -
                  experimental_spectrum.at(position_peak.l_peak).peak_mz ==
                Catch::Approx(aa_code.getMass('R')).epsilon(0.001));
      }
    REQUIRE(spectrum_R_pos.size() == 8);
    /*
     * [Thread-0] INFO main.Main - R: 17 8
[Thread-0] INFO main.Main - (19,02 ,175,12)*
[Thread-0] INFO main.Main - (83,02 ,239,11)
[Thread-0] INFO main.Main - (485,22 ,641,30)*
[Thread-0] INFO main.Main - (503,23 ,659,32)*
[Thread-0] INFO main.Main - (656,29 ,812,37)*
[Thread-0] INFO main.Main - (674,30 ,830,39)
[Thread-0] INFO main.Main - (1076,49 ,1232,58)*
[Thread-0] INFO main.Main - (1140,48 ,1296,59)*

*/
    REQUIRE(spectrum_R_pos.at(0).r_peak == 2);
    REQUIRE(experimental_spectrum.at(spectrum_R_pos.at(0).l_peak).peak_mz ==
            Catch::Approx(19.02).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_R_pos.at(0).r_peak).peak_mz ==
            Catch::Approx(175.12).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_R_pos.at(1).l_peak).peak_mz ==
            Catch::Approx(83.02).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_R_pos.at(1).r_peak).peak_mz ==
            Catch::Approx(239.11).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_R_pos.at(2).l_peak).peak_mz ==
            Catch::Approx(485.22).epsilon(0.01));
    REQUIRE(experimental_spectrum.at(spectrum_R_pos.at(2).r_peak).peak_mz ==
            Catch::Approx(641.30).epsilon(0.01));

    pappso::specpeptidoms::ScoreValues score_values;

    pappso::specpeptidoms::SemiGlobalAlignment semi_global_alignment(
      score_values, precision_ptr, aa_code);

    qDebug();
    semi_global_alignment.fastAlign(experimental_spectrum, &protein); // 1er alignement

    qDebug();
    std::vector<pappso::specpeptidoms::Location> locations =
      semi_global_alignment.getLocationSaver()
        .getLocations(); // On récupère les sous-séquences intéressantes pour un alignement plus
                         // précis.

    REQUIRE(locations.size() == 1);
    /*
     *[Thread-0] INFO main.Main - location 0 [0 ,0], score : 30 ,protein -1, spectrum -1 ,tree 0
[Thread-0] INFO main.Main - location 1 [0 ,0], score : 30 ,protein -1, spectrum -1 ,tree 0
[Thread-0] INFO main.Main - location 2 [0 ,0], score : 30 ,protein -1, spectrum -1 ,tree 0
[Thread-0] INFO main.Main - location 3 [26 ,54], score : 42 ,protein 0, spectrum 0 ,tree 69
[Thread-0] INFO main.Main - location 4 [0 ,0], score : 30 ,protein -1, spectrum -1 ,tree 0
*/

    pappso::specpeptidoms::Location loc = locations.front();

    REQUIRE(loc.beginning == 29);
    REQUIRE(loc.length == 27);
    REQUIRE(loc.score == 31);
    REQUIRE(loc.tree == 73);


    semi_global_alignment.preciseAlign(experimental_spectrum,
                                       &protein,
                                       loc.beginning,
                                       loc.length); // 2e alignement
    pappso::specpeptidoms::Alignment best_alignment =
      semi_global_alignment.getBestAlignment(); // On récupère le meilleur alignement.

    // 20120906_balliau_extract_1_A01_urnb-1_4291.0;-1;0;VGYSDSGK;[63,70];[502,22]VGYSDS[G][K];42;8;16,00;QQVMVGYSDSGK;[59,70];[Q][Q]VMV[G][Y]SDS[G][K];42;10

    REQUIRE(best_alignment.getPeptideString(protein.getSequence()).toStdString() == "VGYSDSGK");


    std::vector<double> potential_mass_errors =
      semi_global_alignment.getPotentialMassErrors(aa_code, best_alignment, protein.getSequence());

    REQUIRE_THAT(potential_mass_errors,
                 Catch::Matchers::Approx(std::vector<double>({502.22154428780885382,
                                                              371.18105919917985602,
                                                              272.11264528465085277,
                                                              144.05406777800286022,
                                                              15.99549027135486767})));
    semi_global_alignment.postProcessingAlign(
      experimental_spectrum, &protein, loc.beginning, loc.length, potential_mass_errors);
    pappso::specpeptidoms::Alignment best_post_processed_alignment =
      semi_global_alignment.getBestAlignment();

    REQUIRE(best_post_processed_alignment.m_peptideModel.toInterpretation().toStdString() ==
            "[Q][Q]VMVGYSDSGK");
    REQUIRE(best_post_processed_alignment.m_peptideModel.toProForma().toStdString() ==
            "[+15.9916]?QQVMVGYSDSGK");
  }
}
