Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@ MSToolkit/extern/expat-2.2.9/
# user-specific Claude Code settings
.claude/settings.local.json

# ignore dynamically generated files
MSToolkit/include/expat.h
MSToolkit/include/expat_external.h
MSToolkit/include/zconf.h
MSToolkit/include/zlib.h
MSToolkit/*.mri

.DS_Store
.idea
Expand Down Expand Up @@ -157,4 +151,4 @@ ipch/
*.msp

# Artifact of CodeQL
_codeql_detected_source_root
_codeql_detected_source_root
19 changes: 19 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,22 @@ Rules for Claude Code:

A `.gitattributes` file at the repo root enforces CRLF for all tracked source files
at the git level, providing a second safety net.


## Development Workflows

### Code Review Protocol (Copilot Mode)
When requested to perform a code review, always execute the following multi-step workflow before writing your feedback:
1. **Tooling Check:** Run the project's respective testing commands to gather concrete diagnostic data.
2. **Analysis:** Review the uncommitted files, staged changes, or the specified branch diff.
3. **Report Generation:** Structure the review using the exact template below.

## Code Review Template
Provide feedback using this exact format:
1. **Summary:** A 1-2 sentence overview of the changes.
2. **Critical Issues:** Bugs, security vulnerabilities, or breaking changes. Provide the file path, exact line numbers, and the core issue.
3. **Code Quality & Maintainability:** Poor practices, anti-patterns, or missing tests.
4. **Actionable Improvements:** Specific refactoring suggestions accompanied by concise code snippets.

*Constraint:* Keep critiques technical, objective, and ranked by severity. Avoid generic praise.

2 changes: 1 addition & 1 deletion Comet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ void LoadParameters(char* pszParamsFile,
enzymeInformation.szSampleEnzymeBreakAA,
enzymeInformation.szSampleEnzymeNoBreakAA);
}
fgets(szParamBuf, SIZE_BUF, fp);
(void)fgets(szParamBuf, SIZE_BUF, fp);
}
fclose(fp);

Expand Down
1,540 changes: 6 additions & 1,534 deletions CometSearch/CometDataInternal.h

Large diffs are not rendered by default.

35 changes: 6 additions & 29 deletions CometSearch/CometFragmentIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ void CometFragmentIndex::GenerateFragmentIndex(ThreadPool *tp)


void CometFragmentIndex::AddFragmentsThreadProc(bool bCountOnly,
ThreadPool *tp)
ThreadPool* /*tp*/)
{
size_t iWhichFragmentPeptide = 0; // unused here for counting only

Expand Down Expand Up @@ -602,7 +602,8 @@ bool CometFragmentIndex::GeneratePlainPeptideIndex(ThreadPool* tp, vector<pair<s
g_staticParams.options.bFastPlainPeptideIdx = true;
g_staticParams.iDbType = DbType::FASTA_DB;

bool bSucceeded = CometSearch::RunSearch(0, 0, tp);
vector<Query*> emptyQueries;
bool bSucceeded = CometSearch::RunSearch(0, 0, tp, emptyQueries);

g_staticParams.options.bCreateFragmentIndex = false;
g_staticParams.options.bFastPlainPeptideIdx = false;
Expand Down Expand Up @@ -1102,64 +1103,40 @@ bool CometFragmentIndex::WriteFIPlainPeptideIndex(ThreadPool *tp)
// Destruction is O(n) for pcVarModSites in g_pvDBIndex but trivial for
// g_vRawPeptides; order no longer matters.
{
auto tClear = chrono::steady_clock::now();
vector<PlainPeptideIndexStruct>().swap(g_vRawPeptides);
// printf(" - freed g_vRawPeptides: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

{
auto tClear = chrono::steady_clock::now();
g_pvDBIndex.clear(); // DBIndex::sPeptide strings freed after g_vRawPeptides
// to keep the allocator bins warm for the string frees above
// printf(" - freed g_pvDBIndex: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

{
auto tClear = chrono::steady_clock::now();
g_pvProteinsList.clear(); // CSR flat layout: 2 free() calls instead of ~190M
// printf(" - freed g_pvProteinsList: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

{
auto tClear = chrono::steady_clock::now();
g_pvProteinNames.clear();
// printf(" - freed g_pvProteinNames: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

{
auto tClear = chrono::steady_clock::now();
vector<string>().swap(MOD_SEQS);
// printf(" - freed MOD_SEQS: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

{
auto tClear = chrono::steady_clock::now();
vector<FragmentPeptidesStruct>().swap(g_vFragmentPeptides);
// printf(" - freed g_vFragmentPeptides: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

{
auto tClear = chrono::steady_clock::now();
delete[] PEPTIDE_MOD_SEQ_IDXS;
PEPTIDE_MOD_SEQ_IDXS = nullptr;
// printf(" - freed PEPTIDE_MOD_SEQ_IDXS: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

{
auto tClear = chrono::steady_clock::now();
delete[] MOD_SEQ_MOD_NUM_START;
MOD_SEQ_MOD_NUM_START = nullptr;
delete[] MOD_SEQ_MOD_NUM_CNT;
MOD_SEQ_MOD_NUM_CNT = nullptr;
// printf(" - freed MOD_SEQ_MOD_NUM_START/CNT: %4lld ms\n",
// (long long)chrono::duration_cast<chrono::milliseconds>(chrono::steady_clock::now() - tClear).count());
}

fflush(stdout);
Expand Down Expand Up @@ -1412,7 +1389,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
{
size_t pepSectionSize = (size_t)(clProteinsFilePos - clPeptidesFilePos) - sizeof(size_t);
vector<char> pepBuf(pepSectionSize);
fread(pepBuf.data(), 1, pepSectionSize, fp);
(void)fread(pepBuf.data(), 1, pepSectionSize, fp);
const char* p = pepBuf.data();

struct PlainPeptideIndexStruct sTmp;
Expand All @@ -1437,7 +1414,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
{
size_t protSectionSize = (size_t)(clPermutationsFilePos - clProteinsFilePos);
vector<char> protBuf(protSectionSize);
fread(protBuf.data(), 1, protSectionSize, fp);
(void)fread(protBuf.data(), 1, protSectionSize, fp);
const char* p = protBuf.data();

size_t tSize;
Expand Down Expand Up @@ -1505,7 +1482,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
comet_fileoffset_t varDataStart = comet_ftell(fp);
size_t varDataSize = (size_t)(clFooterPos - varDataStart);
vector<char> varBuf(varDataSize);
fread(varBuf.data(), 1, varDataSize, fp);
(void)fread(varBuf.data(), 1, varDataSize, fp);
const char* p = varBuf.data();

int iTmp;
Expand Down
2 changes: 1 addition & 1 deletion CometSearch/CometInterfaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ namespace CometInterfaces
ICometSearchManager *GetCometSearchManager();
void ReleaseCometSearchManager();

static ThreadPool* _tp;
[[maybe_unused]] static ThreadPool* _tp;
}

#endif // _COMETINTERFACES_H_
11 changes: 6 additions & 5 deletions CometSearch/CometMassSpecUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb,
bool bReturnFullProteinString, // 0 = return accession only, 1 = return full description line
unsigned int *uiNumTotProteins, // matched protein count
vector<string>& vProteinTargets, // the target protein names
vector<string>& vProteinDecoys) // the decoy protein names if applicable
vector<string>& vProteinDecoys, // the decoy protein names if applicable
const vector<Query*>& queries)
{
char szProteinName[WIDTH_REFERENCE];

Expand All @@ -209,9 +210,9 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb,
Results* pOutput;

if (iPrintTargetDecoy != 2)
pOutput = g_pvQuery.at(iWhichQuery)->_pResults;
pOutput = queries.at(iWhichQuery)->_pResults;
else
pOutput = g_pvQuery.at(iWhichQuery)->_pDecoys;
pOutput = queries.at(iWhichQuery)->_pDecoys;

int iPrintDuplicateProteinCt = 0; // track # proteins, exit when at iMaxDuplicateProteins

Expand Down Expand Up @@ -284,9 +285,9 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb,
Results* pOutput;

if (iPrintTargetDecoy != 2)
pOutput = g_pvQuery.at(iWhichQuery)->_pResults;
pOutput = queries.at(iWhichQuery)->_pResults;
else
pOutput = g_pvQuery.at(iWhichQuery)->_pDecoys;
pOutput = queries.at(iWhichQuery)->_pDecoys;

int iPrintDuplicateProteinCt = 0; // track # proteins, exit when at iMaxDuplicateProteins

Expand Down
3 changes: 2 additions & 1 deletion CometSearch/CometMassSpecUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ class CometMassSpecUtils
bool bReturnFullProteinString, // 0 = return accession only, 1 = return full description line
unsigned int *iNumTotProteins, // matched protein count
vector<string>& vProteinTargets, // the target protein names
vector<string>& vProteinDecoys); // the decoy protein names if applicable
vector<string>& vProteinDecoys, // the decoy protein names if applicable
const vector<Query*>& queries);

static string GetField(std::string *s,
unsigned int n,
Expand Down
22 changes: 11 additions & 11 deletions CometSearch/CometPeptideIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,24 @@ bool CometPeptideIndex::ReadPeptideIndex(void)

comet_fileoffset_t lEndOfPeptides;
comet_fileoffset_t clProteinsFilePos;
size_t tTmpRead;
tTmpRead = fread(&lEndOfPeptides, clSizeCometFileOffset, 1, fp);
tTmpRead = fread(&clProteinsFilePos, clSizeCometFileOffset, 1, fp);
(void)fread(&lEndOfPeptides, clSizeCometFileOffset, 1, fp);
(void)fread(&clProteinsFilePos, clSizeCometFileOffset, 1, fp);

// --- Read the mass index and peptide count from lEndOfPeptides position ---
comet_fseek(fp, lEndOfPeptides, SEEK_SET);

int iMinMass, iMaxMass;
uint64_t tNumPeptides;
tTmpRead = fread(&iMinMass, sizeof(int), 1, fp);
tTmpRead = fread(&iMaxMass, sizeof(int), 1, fp);
tTmpRead = fread(&tNumPeptides, sizeof(uint64_t), 1, fp);
(void)fread(&iMinMass, sizeof(int), 1, fp);
(void)fread(&iMaxMass, sizeof(int), 1, fp);
(void)fread(&tNumPeptides, sizeof(uint64_t), 1, fp);

int iMaxPeptideMass10 = iMaxMass * 10;

// Read the mass index array: lIndex[0..iMaxPeptideMass10-1]
// Each entry is a file offset to the first peptide at that 0.1 Da mass bin
comet_fileoffset_t* lIndex = new comet_fileoffset_t[iMaxPeptideMass10];
tTmpRead = fread(lIndex, clSizeCometFileOffset, iMaxPeptideMass10, fp);
(void)fread(lIndex, clSizeCometFileOffset, iMaxPeptideMass10, fp);

// --- Read protein names ---
// Protein names are stored between end-of-header and clProteinsFilePos
Expand All @@ -125,19 +124,19 @@ bool CometPeptideIndex::ReadPeptideIndex(void)
comet_fseek(fp, clProteinsFilePos, SEEK_SET);

size_t tNumProteinEntries;
tTmpRead = fread(&tNumProteinEntries, clSizeCometFileOffset, 1, fp);
(void)fread(&tNumProteinEntries, clSizeCometFileOffset, 1, fp);

g_pvProteinsList.clear();
g_pvProteinsList.reserve(tNumProteinEntries);

for (size_t i = 0; i < tNumProteinEntries; ++i)
{
size_t tNumProteins;
tTmpRead = fread(&tNumProteins, clSizeCometFileOffset, 1, fp);
(void)fread(&tNumProteins, clSizeCometFileOffset, 1, fp);

vector<comet_fileoffset_t> vTmp(tNumProteins);
for (size_t j = 0; j < tNumProteins; ++j)
tTmpRead = fread(&vTmp[j], clSizeCometFileOffset, 1, fp);
(void)fread(&vTmp[j], clSizeCometFileOffset, 1, fp);
g_pvProteinsList.push_back(std::move(vTmp));
}

Expand Down Expand Up @@ -235,7 +234,8 @@ bool CometPeptideIndex::WritePeptideIndex(ThreadPool* tp)

if (bSucceeded)
{
bSucceeded = CometSearch::RunSearch(0, 0, tp);
vector<Query*> emptyQueries;
bSucceeded = CometSearch::RunSearch(0, 0, tp, emptyQueries);
}

if (!bSucceeded)
Expand Down
10 changes: 5 additions & 5 deletions CometSearch/CometPostAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,18 +176,18 @@ CometPostAnalysis::~CometPostAnalysis()
}


bool CometPostAnalysis::PostAnalysis(ThreadPool* tp)
bool CometPostAnalysis::PostAnalysis(ThreadPool* tp, const vector<Query*>& queries)
{
bool bSucceeded = true;

//Reuse existing ThreadPool
ThreadPool *pPostAnalysisThreadPool = tp;

for (int i=0; i<(int)g_pvQuery.size(); ++i)
for (int i=0; i<(int)queries.size(); ++i)
{
if (g_pvQuery.at(i)->iMatchPeptideCount > 0 || g_pvQuery.at(i)->iDecoyMatchPeptideCount > 0)
if (queries.at(i)->iMatchPeptideCount > 0 || queries.at(i)->iDecoyMatchPeptideCount > 0)
{
PostAnalysisThreadData* pThreadData = new PostAnalysisThreadData(i);
PostAnalysisThreadData* pThreadData = new PostAnalysisThreadData(i, &queries);

pPostAnalysisThreadPool->doJob(std::bind(PostAnalysisThreadProc, pThreadData, pPostAnalysisThreadPool));

Expand Down Expand Up @@ -223,7 +223,7 @@ void CometPostAnalysis::PostAnalysisThreadProc(PostAnalysisThreadData *pThreadDa
(void)tp; // suppress unused parameter warning

int iQueryIndex = pThreadData->iQueryIndex;
Query* pQuery = g_pvQuery.at(iQueryIndex);
Query* pQuery = pThreadData->pQueries->at(iQueryIndex);

AnalyzeSP(pQuery);

Expand Down
7 changes: 5 additions & 2 deletions CometSearch/CometPostAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@
struct PostAnalysisThreadData
{
int iQueryIndex;
const vector<Query*>* pQueries;

PostAnalysisThreadData()
{
iQueryIndex = -1;
pQueries = nullptr;
}

PostAnalysisThreadData(int iQueryIndex_in)
PostAnalysisThreadData(int iQueryIndex_in, const vector<Query*>* pQueries_in)
{
iQueryIndex = iQueryIndex_in;
pQueries = pQueries_in;
}
};

Expand All @@ -41,7 +44,7 @@ class CometPostAnalysis
public:
CometPostAnalysis();
~CometPostAnalysis();
static bool PostAnalysis(ThreadPool* tp);
static bool PostAnalysis(ThreadPool* tp, const vector<Query*>& queries);
static void PostAnalysisThreadProc(PostAnalysisThreadData* pThreadData,
ThreadPool* tp);
// Query*-based overloads, the only versions now
Expand Down
Loading
Loading