From e052d77ffd20495dfbbda4b02730f93bb29f5411 Mon Sep 17 00:00:00 2001
From: Neil Kindlon <nek3d@d-128-109-67.bootp.Virginia.EDU>
Date: Mon, 11 May 2015 17:32:02 -0400
Subject: [PATCH] first check in for new coverage tool
---
Makefile | 2 +-
src/bedtools.cpp | 6 +-
src/coverageBed/Makefile | 35 ---
src/coverageBed/coverageBed.cpp | 341 --------------------------
src/coverageBed/coverageBed.h | 83 -------
src/coverageBed/coverageMain.cpp | 180 --------------
src/coverageFile/Makefile | 46 ++++
src/coverageFile/coverageBed.cpp | 341 ++++++++++++++++++++++++++
src/coverageFile/coverageBed.h | 83 +++++++
src/coverageFile/coverageFile.cpp | 203 +++++++++++++++
src/coverageFile/coverageFile.h | 54 ++++
src/coverageFile/coverageHelp.cpp | 92 +++++++
src/intersectFile/intersectFile.cpp | 1 +
src/mapFile/Makefile | 14 --
src/mapFile/mapHelp.cpp | 1 +
src/utils/Contexts/ContextBase.cpp | 4 +-
src/utils/Contexts/ContextCoverage.cpp | 77 ++++++
src/utils/Contexts/ContextCoverage.h | 40 +++
src/utils/Contexts/ContextFisher.cpp | 7 -
src/utils/Contexts/ContextIntersect.h | 2 +-
src/utils/Contexts/ContextMap.cpp | 11 -
src/utils/Contexts/ContextMerge.cpp | 8 -
src/utils/Contexts/ContextSample.cpp | 12 -
src/utils/Contexts/ContextSpacing.cpp | 11 -
src/utils/Contexts/ContextSubtract.cpp | 13 -
src/utils/Contexts/Makefile | 5 +-
src/utils/RecordOutputMgr/RecordOutputMgr.cpp | 8 +-
src/utils/aux/BedtoolsDriver.cpp | 8 +-
src/utils/aux/Makefile | 2 +-
src/utils/general/QuickString.cpp | 14 ++
src/utils/general/QuickString.h | 3 +
test/intersect/test-intersect.sh | 15 +-
32 files changed, 988 insertions(+), 734 deletions(-)
delete mode 100644 src/coverageBed/Makefile
delete mode 100644 src/coverageBed/coverageBed.cpp
delete mode 100644 src/coverageBed/coverageBed.h
delete mode 100644 src/coverageBed/coverageMain.cpp
create mode 100644 src/coverageFile/Makefile
create mode 100644 src/coverageFile/coverageBed.cpp
create mode 100644 src/coverageFile/coverageBed.h
create mode 100644 src/coverageFile/coverageFile.cpp
create mode 100644 src/coverageFile/coverageFile.h
create mode 100644 src/coverageFile/coverageHelp.cpp
create mode 100644 src/utils/Contexts/ContextCoverage.cpp
create mode 100644 src/utils/Contexts/ContextCoverage.h
diff --git a/Makefile b/Makefile
index 653f0c3..3119a93 100644
--- a/Makefile
+++ b/Makefile
@@ -37,7 +37,7 @@ SUBDIRS = $(SRC_DIR)/annotateBed \
$(SRC_DIR)/closestFile \
$(SRC_DIR)/clusterBed \
$(SRC_DIR)/complementBed \
- $(SRC_DIR)/coverageBed \
+ $(SRC_DIR)/coverageFile \
$(SRC_DIR)/expand \
$(SRC_DIR)/fastaFromBed \
$(SRC_DIR)/flankBed \
diff --git a/src/bedtools.cpp b/src/bedtools.cpp
index 68ea99c..2f09438 100644
--- a/src/bedtools.cpp
+++ b/src/bedtools.cpp
@@ -47,7 +47,7 @@ int bedpetobam_main(int argc, char* argv[]);//
void closest_help();
int cluster_main(int argc, char* argv[]); //
int complement_main(int argc, char* argv[]);//
-int coverage_main(int argc, char* argv[]); //
+void coverage_help();
int regress_test_main(int argc, char **argv); //
int expand_main(int argc, char* argv[]);//
int fastafrombed_main(int argc, char* argv[]);//
@@ -104,7 +104,6 @@ int main(int argc, char *argv[])
// genome arithmetic tools
else if (subCmd == "window") return window_main(argc-1, argv+1);
- else if (subCmd == "coverage") return coverage_main(argc-1, argv+1);
else if (subCmd == "genomecov") return genomecoverage_main(argc-1, argv+1);
else if (subCmd == "cluster") return cluster_main(argc-1, argv+1);
else if (subCmd == "complement") return complement_main(argc-1, argv+1);
@@ -306,5 +305,8 @@ void showHelp(const QuickString &subCmd) {
spacing_help();
} else if (subCmd == "fisher") {
fisher_help();
+ } else if (subCmd == "coverage") {
+ coverage_help();
}
+
}
diff --git a/src/coverageBed/Makefile b/src/coverageBed/Makefile
deleted file mode 100644
index 581564e..0000000
--- a/src/coverageBed/Makefile
+++ /dev/null
@@ -1,35 +0,0 @@
-UTILITIES_DIR = ../utils/
-OBJ_DIR = ../../obj/
-BIN_DIR = ../../bin/
-
-# -------------------
-# define our includes
-# -------------------
-INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
- -I$(UTILITIES_DIR)/gzstream/ \
- -I$(UTILITIES_DIR)/genomeFile/ \
- -I$(UTILITIES_DIR)/lineFileUtilities/ \
- -I$(UTILITIES_DIR)/fileType/ \
- -I$(UTILITIES_DIR)/BamTools/include \
- -I$(UTILITIES_DIR)/BlockedIntervals \
- -I$(UTILITIES_DIR)/version/
-# ----------------------------------
-# define our source and object files
-# ----------------------------------
-SOURCES= coverageMain.cpp coverageBed.cpp coverageBed.h
-OBJECTS= coverageMain.o coverageBed.o
-BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
-
-all: $(BUILT_OBJECTS)
-
-.PHONY: all
-
-$(BUILT_OBJECTS): $(SOURCES)
- @echo " * compiling" $(*F).cpp
- @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
-
-clean:
- @echo "Cleaning up."
- @rm -f $(OBJ_DIR)/coverageMain.o $(OBJ_DIR)/coverageBed.o
-
-.PHONY: clean
diff --git a/src/coverageBed/coverageBed.cpp b/src/coverageBed/coverageBed.cpp
deleted file mode 100644
index 42ff265..0000000
--- a/src/coverageBed/coverageBed.cpp
+++ /dev/null
@@ -1,341 +0,0 @@
-/*****************************************************************************
- coverageBed.cpp
-
- (c) 2009 - Aaron Quinlan
- Hall Laboratory
- Department of Biochemistry and Molecular Genetics
- University of Virginia
- aaronquinlan@gmail.com
-
- Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "lineFileUtilities.h"
-#include "coverageBed.h"
-
-// build
-BedCoverage::BedCoverage(string &bedAFile, string &bedBFile,
- bool sameStrand, bool diffStrand,
- bool writeHistogram, bool bamInput,
- bool obeySplits, bool eachBase,
- bool countsOnly)
-{
-
- _bedAFile = bedAFile;
- _bedBFile = bedBFile;
-
- _bedA = new BedFile(bedAFile);
- _bedB = new BedFile(bedBFile);
-
- _sameStrand = sameStrand;
- _diffStrand = diffStrand;
- _obeySplits = obeySplits;
- _eachBase = eachBase;
- _writeHistogram = writeHistogram;
- _bamInput = bamInput;
- _countsOnly = countsOnly;
-
-
- if (_bamInput == false)
- CollectCoverageBed();
- else
- CollectCoverageBam(_bedA->bedFile);
-}
-
-// destroy
-BedCoverage::~BedCoverage(void) {
- delete _bedA;
- delete _bedB;
-}
-
-
-void BedCoverage::CollectCoverageBed() {
-
- // load the "B" bed file into a map so
- // that we can easily compare "A" to it for overlaps
- _bedB->loadBedCovFileIntoMap();
-
- BED a;
- _bedA->Open();
- // process each entry in A
- while (_bedA->GetNextBed(a)) {
- if (_bedA->_status == BED_VALID) {
- // process the BED entry as a single block
- if (_obeySplits == false)
- _bedB->countHits(a, _sameStrand,
- _diffStrand, _countsOnly);
- // split the BED into discrete blocksand process
- // each independently.
- else {
- bedVector bedBlocks;
- GetBedBlocks(a, bedBlocks);
- // use countSplitHits to avoid over-counting
- // each split chunk as distinct read coverage.
- _bedB->countSplitHits(bedBlocks, _sameStrand,
- _diffStrand, _countsOnly);
- }
- }
- }
- _bedA->Close();
-
- // report the coverage (summary or histogram) for BED B.
- if (_countsOnly == true)
- ReportCounts();
- else
- ReportCoverage();
-}
-
-
-void BedCoverage::CollectCoverageBam(string bamFile) {
-
- // load the "B" bed file into a map so
- // that we can easily compare "A" to it for overlaps
- _bedB->loadBedCovFileIntoMap();
-
- // open the BAM file
- BamReader reader;
- if (!reader.Open(bamFile)) {
- cerr << "Failed to open BAM file " << bamFile << endl;
- exit(1);
- }
- // get header & reference information
- string header = reader.GetHeaderText();
- RefVector refs = reader.GetReferenceData();
-
- // convert each aligned BAM entry to BED
- // and compute coverage on B
- BamAlignment bam;
- while (reader.GetNextAlignment(bam)) {
- if (bam.IsMapped()) {
- // treat the BAM alignment as a single "block"
- if (_obeySplits == false) {
- // construct a new BED entry from the current
- // BAM alignment.
- BED a;
-
- try
- {
- a.chrom = refs.at(bam.RefID).RefName;
- a.start = bam.Position;
- a.end = bam.GetEndPosition(false, false);
- a.strand = "+";
- if (bam.IsReverseStrand()) a.strand = "-";
-
- _bedB->countHits(a, _sameStrand,
- _diffStrand, _countsOnly);
- }
- catch (out_of_range& oor)
- {
- cerr << bam.Name
- << " is said to be mapped (0x4 == false), "
- << "yet the chrom is missing. Skipping."
- << endl;
- }
- }
- // split the BAM alignment into discrete blocks and
- // look for overlaps only within each block.
- else {
- // vec to store the discrete BED "blocks" from a
- bedVector bedBlocks;
- // since we are counting coverage, we do want
- // to split blocks when a deletion (D) CIGAR op
- // is encountered (hence the true for the last parm)
- try
- {
- string chrom = refs.at(bam.RefID).RefName;
- GetBamBlocks(bam, chrom, bedBlocks, false, true);
- // use countSplitHits to avoid over-counting
- // each split chunk as distinct read coverage.
- _bedB->countSplitHits(bedBlocks, _sameStrand,
- _diffStrand, _countsOnly);
- }
- catch (out_of_range& oor)
- {
- cerr << bam.Name
- << " is said to be mapped (0x4 == false), "
- << "yet the chrom is missing. Skipping."
- << endl;
- }
- }
- }
- }
- // report the coverage (summary or histogram) for BED B.
- if (_countsOnly == true)
- ReportCounts();
- else
- ReportCoverage();
- // close the BAM file
- reader.Close();
-}
-
-
-void BedCoverage::ReportCounts() {
-
-
- // process each chromosome
- masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
- masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
- for (; chromItr != chromEnd; ++chromItr)
- {
- // for each chrom, process each bin
- binsToBedCovs::const_iterator binItr = chromItr->second.begin();
- binsToBedCovs::const_iterator binEnd = chromItr->second.end();
- for (; binItr != binEnd; ++binItr)
- {
- // for each chrom & bin, compute and report
- // the observed coverage for each feature
- vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
- vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
- for (; bedItr != bedEnd; ++bedItr)
- {
- _bedB->reportBedTab(*bedItr);
- printf("%d\n", bedItr->count);
- }
- }
- }
-}
-
-void BedCoverage::ReportCoverage() {
-
- map<unsigned long, unsigned long> allDepthHist;
- unsigned long totalLength = 0;
-
- // process each chromosome
- masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
- masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
- for (; chromItr != chromEnd; ++chromItr)
- {
- // for each chrom, process each bin
- binsToBedCovs::const_iterator binItr = chromItr->second.begin();
- binsToBedCovs::const_iterator binEnd = chromItr->second.end();
- for (; binItr != binEnd; ++binItr)
- {
- // for each chrom & bin, compute and report
- // the observed coverage for each feature
- vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
- vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
- for (; bedItr != bedEnd; ++bedItr)
- {
- int zeroDepthCount = 0; // number of bases with zero depth
- int depth = 0; // tracks the depth at the current base
-
- // the start is either the first base in the feature OR
- // the leftmost position of an overlapping feature.
- // e.g. (s = start):
- // A ----------
- // B s ------------
- int start = min(bedItr->minOverlapStart, bedItr->start);
-
- // track the number of bases in the feature covered by
- // 0, 1, 2, ... n features in A
- map<unsigned int, unsigned int> depthHist;
- map<unsigned int, DEPTH>::const_iterator depthItr;
-
- // compute the coverage observed at each base in
- // the feature marching from start to end.
- for (CHRPOS pos = start+1; pos <= bedItr->end; pos++)
- {
- // map pointer grabbing the starts and
- // ends observed at this position
- depthItr = bedItr->depthMap.find(pos);
- // increment coverage if starts observed at this position.
- if (depthItr != bedItr->depthMap.end())
- depth += depthItr->second.starts;
- // update coverage assuming the current position is
- // within the current B feature
- if ((pos > bedItr->start) && (pos <= bedItr->end)) {
- if (depth == 0) zeroDepthCount++;
- // update our histograms, assuming we are not
- // reporting "per-base" coverage.
- if (_eachBase == false) {
- depthHist[depth]++;
- allDepthHist[depth]++;
- }
- else if ((_eachBase == true) &&
- (bedItr->zeroLength == false))
- {
- _bedB->reportBedTab(*bedItr);
- printf("%d\t%d\n", pos-bedItr->start, depth);
- }
- }
- // decrement coverage if ends observed at this position.
- if (depthItr != bedItr->depthMap.end())
- depth = depth - depthItr->second.ends;
- }
-
- // handle the special case where the user wants "per-base" depth
- // but the current feature is length = 0.
- if ((_eachBase == true) && (bedItr->zeroLength == true)) {
- _bedB->reportBedTab(*bedItr);
- printf("1\t%d\n",depth);
- }
- // Summarize the coverage for the current interval,
- // assuming the user has not requested "per-base" coverage.
- else if (_eachBase == false)
- {
- CHRPOS length = bedItr->end - bedItr->start;
- if (bedItr->zeroLength == true) {
- length = 0;
- }
- totalLength += length;
- int nonZeroBases = (length - zeroDepthCount);
-
- float fractCovered = 0.0;
- if (bedItr->zeroLength == false) {
- fractCovered = (float) nonZeroBases / length;
- }
-
- // print a summary of the coverage
- if (_writeHistogram == false) {
- _bedB->reportBedTab(*bedItr);
- printf("%d\t%d\t%d\t%0.7f\n",
- bedItr->count, nonZeroBases,
- length, fractCovered);
- }
- // HISTOGRAM
- // report the number of bases with coverage == x
- else {
- // produce a histogram when not a zero length feature.
- if (bedItr->zeroLength == false) {
- map<unsigned int, unsigned int>::const_iterator
- histItr = depthHist.begin();
- map<unsigned int, unsigned int>::const_iterator
- histEnd = depthHist.end();
- for (; histItr != histEnd; ++histItr)
- {
- float fractAtThisDepth = (float)
- histItr->second / length;
-
- _bedB->reportBedTab(*bedItr);
- printf("%d\t%d\t%d\t%0.7f\n",
- histItr->first, histItr->second,
- length, fractAtThisDepth);
- }
- }
- // special case when it is a zero length feauture.
- else {
- _bedB->reportBedTab(*bedItr);
- printf("%d\t%d\t%d\t%0.7f\n",
- bedItr->count, 0, 0, 1.0000000);
- }
- }
- }
- }
- }
- }
- // report a histogram of coverage among _all_
- // features in B.
- if (_writeHistogram == true) {
- map<unsigned long, unsigned long>::const_iterator
- histItr = allDepthHist.begin();
- map<unsigned long, unsigned long>::const_iterator
- histEnd = allDepthHist.end();
- for (; histItr != histEnd; ++histItr) {
- float fractAtThisDepth = (float) histItr->second / totalLength;
- printf("all\t%lu\t%lu\t%lu\t%0.7f\n",
- histItr->first, histItr->second,
- totalLength, fractAtThisDepth);
- }
- }
-}
-
-
diff --git a/src/coverageBed/coverageBed.h b/src/coverageBed/coverageBed.h
deleted file mode 100644
index a3b6656..0000000
--- a/src/coverageBed/coverageBed.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*****************************************************************************
- coverageBed.h
-
- (c) 2009 - Aaron Quinlan
- Hall Laboratory
- Department of Biochemistry and Molecular Genetics
- University of Virginia
- aaronquinlan@gmail.com
-
- Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#ifndef COVERAGEBED_H
-#define COVERAGEBED_H
-
-#include "bedFile.h"
-
-#include "api/BamReader.h"
-#include "api/BamAux.h"
-#include "BlockedIntervals.h"
-using namespace BamTools;
-
-#include <vector>
-#include <algorithm>
-#include <iostream>
-#include <iomanip>
-#include <fstream>
-#include <stdlib.h>
-
-using namespace std;
-
-//************************************************
-// Class methods and elements
-//************************************************
-class BedCoverage {
-
-public:
-
- // constructor
- BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, bool writeHistogram,
- bool bamInput, bool obeySplits, bool eachBase, bool countsOnly);
-
- // destructor
- ~BedCoverage(void);
-
-private:
-
- // input files.
- string _bedAFile;
- string _bedBFile;
-
- // instance of a bed file class.
- BedFile *_bedA, *_bedB;
-
- // do we care about same or opposite strandedness when counting coverage?
- bool _sameStrand;
- bool _diffStrand;
-
- // should we write a histogram for each feature in B?
- bool _writeHistogram;
-
- // are we dealing with BAM input for "A"?
- bool _bamInput;
-
- // should we split BED/BAM into discrete blocks?
- bool _obeySplits;
-
- // should discrete coverage be reported for each base in each feature?
- bool _eachBase;
-
- // should we just count overlaps and not try to describe the breadth?
- bool _countsOnly;
-
- // private function for reporting coverage information
- void ReportCoverage();
-
- // private function for reporting overlap counts
- void ReportCounts();
-
- void CollectCoverageBed();
-
- void CollectCoverageBam(string bamFile);
-};
-#endif /* COVERAGEBED_H */
diff --git a/src/coverageBed/coverageMain.cpp b/src/coverageBed/coverageMain.cpp
deleted file mode 100644
index 05bd212..0000000
--- a/src/coverageBed/coverageMain.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-/*****************************************************************************
- coverageMain.cpp
-
- (c) 2009 - Aaron Quinlan
- Hall Laboratory
- Department of Biochemistry and Molecular Genetics
- University of Virginia
- aaronquinlan@gmail.com
-
- Licenced under the GNU General Public License 2.0 license.
-******************************************************************************/
-#include "coverageBed.h"
-#include "version.h"
-
-using namespace std;
-
-// define the version
-#define PROGRAM_NAME "bedtools coverage"
-
-// define our parameter checking macro
-#define PARAMETER_CHECK(param, paramLen, actualLen) (strncmp(argv[i], param, min(actualLen, paramLen))== 0) && (actualLen == paramLen)
-
-// function declarations
-void coverage_help(void);
-
-int coverage_main(int argc, char* argv[]) {
-
- // our configuration variables
- bool showHelp = false;
-
- // input files
- string bedAFile;
- string bedBFile;
-
- // parm flags
- bool sameStrand = false;
- bool diffStrand = false;
- bool writeHistogram = false;
- bool eachBase = false;
- bool obeySplits = false;
- bool bamInput = false;
- bool haveBedA = false;
- bool haveBedB = false;
- bool countsOnly = false;
-
- // check to see if we should print out some help
- if(argc <= 1) showHelp = true;
-
- for(int i = 1; i < argc; i++) {
- int parameterLength = (int)strlen(argv[i]);
-
- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
- (PARAMETER_CHECK("--help", 5, parameterLength))) {
- showHelp = true;
- }
- }
-
- if(showHelp) coverage_help();
-
- // do some parsing (all of these parameters require 2 strings)
- for(int i = 1; i < argc; i++) {
-
- int parameterLength = (int)strlen(argv[i]);
-
- if(PARAMETER_CHECK("-a", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveBedA = true;
- bedAFile = argv[i + 1];
- i++;
- }
- }
- else if(PARAMETER_CHECK("-abam", 5, parameterLength)) {
- if ((i+1) < argc) {
- haveBedA = true;
- bamInput = true;
- bedAFile = argv[i + 1];
- i++;
- }
- }
- else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveBedB = true;
- bedBFile = argv[i + 1];
- i++;
- }
- }
- else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
- sameStrand = true;
- }
- else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
- diffStrand = true;
- }
- else if (PARAMETER_CHECK("-hist", 5, parameterLength)) {
- writeHistogram = true;
- }
- else if(PARAMETER_CHECK("-d", 2, parameterLength)) {
- eachBase = true;
- }
- else if (PARAMETER_CHECK("-split", 6, parameterLength)) {
- obeySplits = true;
- }
- else if (PARAMETER_CHECK("-counts", 7, parameterLength)) {
- countsOnly = true;
- }
- else {
- cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
- showHelp = true;
- }
- }
-
- // make sure we have both input files
- if (!haveBedA || !haveBedB) {
- cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
- showHelp = true;
- }
-
- if (sameStrand && diffStrand) {
- cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
- showHelp = true;
- }
-
- if (!showHelp) {
- BedCoverage *bg = new BedCoverage(bedAFile, bedBFile, sameStrand, diffStrand,
- writeHistogram, bamInput, obeySplits, eachBase, countsOnly);
- delete bg;
- }
- else {
- coverage_help();
- }
- return 0;
-}
-
-void coverage_help(void) {
-
- cerr << "\nTool: bedtools coverage (aka coverageBed)" << endl;
- cerr << "Version: " << VERSION << "\n";
- cerr << "Summary: Returns the depth and breadth of coverage of features from A" << endl;
- cerr << "\t on the intervals in B." << endl << endl;
-
- cerr << "Usage: " << PROGRAM_NAME << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
-
- cerr << "Options: " << endl;
-
- cerr << "\t-abam\t" << "The A input file is in BAM format. Replaces -a." << endl << endl;
-
- cerr << "\t-s\t" << "Require same strandedness. That is, only counts hits in A that" << endl;
- cerr << "\t\toverlap B on the _same_ strand." << endl;
- cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
-
- cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in A" << endl;
- cerr << "\t\tthat overlap B on the _opposite_ strand." << endl;
- cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
-
- cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl;
- cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
- cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl;
- cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl;
-
- cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl;
- cerr << "\t\tPositions reported are one based. Each position" << endl;
- cerr << "\t\tand depth follow the complete B feature." << endl << endl;
-
- cerr << "\t-counts\t" << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl;
-
- cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl;
- cerr << "\t\twhen computing coverage." << endl;
- cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl;
- cerr << "\t\tto infer the blocks for computing coverage." << endl;
- cerr << "\t\tFor BED12 files, this uses the BlockCount, BlockStarts," << endl;
- cerr << "\t\tand BlockEnds fields (i.e., columns 10,11,12)." << endl << endl;
-
- cerr << "Default Output: " << endl;
- cerr << "\t" << " After each entry in B, reports: " << endl;
- cerr << "\t 1) The number of features in A that overlapped the B interval." << endl;
- cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl;
- cerr << "\t 3) The length of the entry in B." << endl;
- cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl << endl;
-
- exit(1);
-}
diff --git a/src/coverageFile/Makefile b/src/coverageFile/Makefile
new file mode 100644
index 0000000..4af8d7e
--- /dev/null
+++ b/src/coverageFile/Makefile
@@ -0,0 +1,46 @@
+UTILITIES_DIR = ../utils/
+OBJ_DIR = ../../obj/
+BIN_DIR = ../../bin/
+TOOL_DIR = ../../src/
+
+INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
+ -I$(UTILITIES_DIR)/general/ \
+ -I$(UTILITIES_DIR)/fileType/ \
+ -I$(UTILITIES_DIR)/gzstream/ \
+ -I$(UTILITIES_DIR)/GenomeFile/ \
+ -I$(UTILITIES_DIR)/BamTools/include \
+ -I$(UTILITIES_DIR)/BamTools/src \
+ -I$(UTILITIES_DIR)/BlockedIntervals \
+ -I$(UTILITIES_DIR)/BamTools-Ancillary \
+ -I$(UTILITIES_DIR)/FileRecordTools/ \
+ -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
+ -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+ -I$(UTILITIES_DIR)/RecordOutputMgr/ \
+ -I$(UTILITIES_DIR)/KeyListOps/ \
+ -I$(UTILITIES_DIR)/NewChromsweep \
+ -I$(UTILITIES_DIR)/VectorOps \
+ -I$(UTILITIES_DIR)/BinTree \
+ -I$(UTILITIES_DIR)/version/ \
+ -I$(UTILITIES_DIR)/ToolBase/ \
+ -I$(TOOL_DIR)/intersectFile/
+
+# ----------------------------------
+# define our source and object files
+# ----------------------------------
+SOURCES= coverageHelp.cpp coverageFile.cpp coverageFile.h
+OBJECTS= coverageHelp.o coverageFile.o
+BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
+
+all: $(BUILT_OBJECTS)
+
+.PHONY: all
+
+$(BUILT_OBJECTS): $(SOURCES)
+ @echo " * compiling" $(*F).cpp
+ @$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(DFLAGS) $(INCLUDES)
+
+clean:
+ @echo "Cleaning up."
+ @rm -f $(OBJ_DIR)/coverageHelp.o $(OBJ_DIR)/coverageFile.o
+
+.PHONY: clean
diff --git a/src/coverageFile/coverageBed.cpp b/src/coverageFile/coverageBed.cpp
new file mode 100644
index 0000000..42ff265
--- /dev/null
+++ b/src/coverageFile/coverageBed.cpp
@@ -0,0 +1,341 @@
+/*****************************************************************************
+ coverageBed.cpp
+
+ (c) 2009 - Aaron Quinlan
+ Hall Laboratory
+ Department of Biochemistry and Molecular Genetics
+ University of Virginia
+ aaronquinlan@gmail.com
+
+ Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "lineFileUtilities.h"
+#include "coverageBed.h"
+
+// build
+BedCoverage::BedCoverage(string &bedAFile, string &bedBFile,
+ bool sameStrand, bool diffStrand,
+ bool writeHistogram, bool bamInput,
+ bool obeySplits, bool eachBase,
+ bool countsOnly)
+{
+
+ _bedAFile = bedAFile;
+ _bedBFile = bedBFile;
+
+ _bedA = new BedFile(bedAFile);
+ _bedB = new BedFile(bedBFile);
+
+ _sameStrand = sameStrand;
+ _diffStrand = diffStrand;
+ _obeySplits = obeySplits;
+ _eachBase = eachBase;
+ _writeHistogram = writeHistogram;
+ _bamInput = bamInput;
+ _countsOnly = countsOnly;
+
+
+ if (_bamInput == false)
+ CollectCoverageBed();
+ else
+ CollectCoverageBam(_bedA->bedFile);
+}
+
+// destroy
+BedCoverage::~BedCoverage(void) {
+ delete _bedA;
+ delete _bedB;
+}
+
+
+void BedCoverage::CollectCoverageBed() {
+
+ // load the "B" bed file into a map so
+ // that we can easily compare "A" to it for overlaps
+ _bedB->loadBedCovFileIntoMap();
+
+ BED a;
+ _bedA->Open();
+ // process each entry in A
+ while (_bedA->GetNextBed(a)) {
+ if (_bedA->_status == BED_VALID) {
+ // process the BED entry as a single block
+ if (_obeySplits == false)
+ _bedB->countHits(a, _sameStrand,
+ _diffStrand, _countsOnly);
+ // split the BED into discrete blocksand process
+ // each independently.
+ else {
+ bedVector bedBlocks;
+ GetBedBlocks(a, bedBlocks);
+ // use countSplitHits to avoid over-counting
+ // each split chunk as distinct read coverage.
+ _bedB->countSplitHits(bedBlocks, _sameStrand,
+ _diffStrand, _countsOnly);
+ }
+ }
+ }
+ _bedA->Close();
+
+ // report the coverage (summary or histogram) for BED B.
+ if (_countsOnly == true)
+ ReportCounts();
+ else
+ ReportCoverage();
+}
+
+
+void BedCoverage::CollectCoverageBam(string bamFile) {
+
+ // load the "B" bed file into a map so
+ // that we can easily compare "A" to it for overlaps
+ _bedB->loadBedCovFileIntoMap();
+
+ // open the BAM file
+ BamReader reader;
+ if (!reader.Open(bamFile)) {
+ cerr << "Failed to open BAM file " << bamFile << endl;
+ exit(1);
+ }
+ // get header & reference information
+ string header = reader.GetHeaderText();
+ RefVector refs = reader.GetReferenceData();
+
+ // convert each aligned BAM entry to BED
+ // and compute coverage on B
+ BamAlignment bam;
+ while (reader.GetNextAlignment(bam)) {
+ if (bam.IsMapped()) {
+ // treat the BAM alignment as a single "block"
+ if (_obeySplits == false) {
+ // construct a new BED entry from the current
+ // BAM alignment.
+ BED a;
+
+ try
+ {
+ a.chrom = refs.at(bam.RefID).RefName;
+ a.start = bam.Position;
+ a.end = bam.GetEndPosition(false, false);
+ a.strand = "+";
+ if (bam.IsReverseStrand()) a.strand = "-";
+
+ _bedB->countHits(a, _sameStrand,
+ _diffStrand, _countsOnly);
+ }
+ catch (out_of_range& oor)
+ {
+ cerr << bam.Name
+ << " is said to be mapped (0x4 == false), "
+ << "yet the chrom is missing. Skipping."
+ << endl;
+ }
+ }
+ // split the BAM alignment into discrete blocks and
+ // look for overlaps only within each block.
+ else {
+ // vec to store the discrete BED "blocks" from a
+ bedVector bedBlocks;
+ // since we are counting coverage, we do want
+ // to split blocks when a deletion (D) CIGAR op
+ // is encountered (hence the true for the last parm)
+ try
+ {
+ string chrom = refs.at(bam.RefID).RefName;
+ GetBamBlocks(bam, chrom, bedBlocks, false, true);
+ // use countSplitHits to avoid over-counting
+ // each split chunk as distinct read coverage.
+ _bedB->countSplitHits(bedBlocks, _sameStrand,
+ _diffStrand, _countsOnly);
+ }
+ catch (out_of_range& oor)
+ {
+ cerr << bam.Name
+ << " is said to be mapped (0x4 == false), "
+ << "yet the chrom is missing. Skipping."
+ << endl;
+ }
+ }
+ }
+ }
+ // report the coverage (summary or histogram) for BED B.
+ if (_countsOnly == true)
+ ReportCounts();
+ else
+ ReportCoverage();
+ // close the BAM file
+ reader.Close();
+}
+
+
+void BedCoverage::ReportCounts() {
+
+
+ // process each chromosome
+ masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
+ masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
+ for (; chromItr != chromEnd; ++chromItr)
+ {
+ // for each chrom, process each bin
+ binsToBedCovs::const_iterator binItr = chromItr->second.begin();
+ binsToBedCovs::const_iterator binEnd = chromItr->second.end();
+ for (; binItr != binEnd; ++binItr)
+ {
+ // for each chrom & bin, compute and report
+ // the observed coverage for each feature
+ vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
+ vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
+ for (; bedItr != bedEnd; ++bedItr)
+ {
+ _bedB->reportBedTab(*bedItr);
+ printf("%d\n", bedItr->count);
+ }
+ }
+ }
+}
+
+void BedCoverage::ReportCoverage() {
+
+ map<unsigned long, unsigned long> allDepthHist;
+ unsigned long totalLength = 0;
+
+ // process each chromosome
+ masterBedCovMap::const_iterator chromItr = _bedB->bedCovMap.begin();
+ masterBedCovMap::const_iterator chromEnd = _bedB->bedCovMap.end();
+ for (; chromItr != chromEnd; ++chromItr)
+ {
+ // for each chrom, process each bin
+ binsToBedCovs::const_iterator binItr = chromItr->second.begin();
+ binsToBedCovs::const_iterator binEnd = chromItr->second.end();
+ for (; binItr != binEnd; ++binItr)
+ {
+ // for each chrom & bin, compute and report
+ // the observed coverage for each feature
+ vector<BEDCOV>::const_iterator bedItr = binItr->second.begin();
+ vector<BEDCOV>::const_iterator bedEnd = binItr->second.end();
+ for (; bedItr != bedEnd; ++bedItr)
+ {
+ int zeroDepthCount = 0; // number of bases with zero depth
+ int depth = 0; // tracks the depth at the current base
+
+ // the start is either the first base in the feature OR
+ // the leftmost position of an overlapping feature.
+ // e.g. (s = start):
+ // A ----------
+ // B s ------------
+ int start = min(bedItr->minOverlapStart, bedItr->start);
+
+ // track the number of bases in the feature covered by
+ // 0, 1, 2, ... n features in A
+ map<unsigned int, unsigned int> depthHist;
+ map<unsigned int, DEPTH>::const_iterator depthItr;
+
+ // compute the coverage observed at each base in
+ // the feature marching from start to end.
+ for (CHRPOS pos = start+1; pos <= bedItr->end; pos++)
+ {
+ // map pointer grabbing the starts and
+ // ends observed at this position
+ depthItr = bedItr->depthMap.find(pos);
+ // increment coverage if starts observed at this position.
+ if (depthItr != bedItr->depthMap.end())
+ depth += depthItr->second.starts;
+ // update coverage assuming the current position is
+ // within the current B feature
+ if ((pos > bedItr->start) && (pos <= bedItr->end)) {
+ if (depth == 0) zeroDepthCount++;
+ // update our histograms, assuming we are not
+ // reporting "per-base" coverage.
+ if (_eachBase == false) {
+ depthHist[depth]++;
+ allDepthHist[depth]++;
+ }
+ else if ((_eachBase == true) &&
+ (bedItr->zeroLength == false))
+ {
+ _bedB->reportBedTab(*bedItr);
+ printf("%d\t%d\n", pos-bedItr->start, depth);
+ }
+ }
+ // decrement coverage if ends observed at this position.
+ if (depthItr != bedItr->depthMap.end())
+ depth = depth - depthItr->second.ends;
+ }
+
+ // handle the special case where the user wants "per-base" depth
+ // but the current feature is length = 0.
+ if ((_eachBase == true) && (bedItr->zeroLength == true)) {
+ _bedB->reportBedTab(*bedItr);
+ printf("1\t%d\n",depth);
+ }
+ // Summarize the coverage for the current interval,
+ // assuming the user has not requested "per-base" coverage.
+ else if (_eachBase == false)
+ {
+ CHRPOS length = bedItr->end - bedItr->start;
+ if (bedItr->zeroLength == true) {
+ length = 0;
+ }
+ totalLength += length;
+ int nonZeroBases = (length - zeroDepthCount);
+
+ float fractCovered = 0.0;
+ if (bedItr->zeroLength == false) {
+ fractCovered = (float) nonZeroBases / length;
+ }
+
+ // print a summary of the coverage
+ if (_writeHistogram == false) {
+ _bedB->reportBedTab(*bedItr);
+ printf("%d\t%d\t%d\t%0.7f\n",
+ bedItr->count, nonZeroBases,
+ length, fractCovered);
+ }
+ // HISTOGRAM
+ // report the number of bases with coverage == x
+ else {
+ // produce a histogram when not a zero length feature.
+ if (bedItr->zeroLength == false) {
+ map<unsigned int, unsigned int>::const_iterator
+ histItr = depthHist.begin();
+ map<unsigned int, unsigned int>::const_iterator
+ histEnd = depthHist.end();
+ for (; histItr != histEnd; ++histItr)
+ {
+ float fractAtThisDepth = (float)
+ histItr->second / length;
+
+ _bedB->reportBedTab(*bedItr);
+ printf("%d\t%d\t%d\t%0.7f\n",
+ histItr->first, histItr->second,
+ length, fractAtThisDepth);
+ }
+ }
+ // special case when it is a zero length feauture.
+ else {
+ _bedB->reportBedTab(*bedItr);
+ printf("%d\t%d\t%d\t%0.7f\n",
+ bedItr->count, 0, 0, 1.0000000);
+ }
+ }
+ }
+ }
+ }
+ }
+ // report a histogram of coverage among _all_
+ // features in B.
+ if (_writeHistogram == true) {
+ map<unsigned long, unsigned long>::const_iterator
+ histItr = allDepthHist.begin();
+ map<unsigned long, unsigned long>::const_iterator
+ histEnd = allDepthHist.end();
+ for (; histItr != histEnd; ++histItr) {
+ float fractAtThisDepth = (float) histItr->second / totalLength;
+ printf("all\t%lu\t%lu\t%lu\t%0.7f\n",
+ histItr->first, histItr->second,
+ totalLength, fractAtThisDepth);
+ }
+ }
+}
+
+
diff --git a/src/coverageFile/coverageBed.h b/src/coverageFile/coverageBed.h
new file mode 100644
index 0000000..a3b6656
--- /dev/null
+++ b/src/coverageFile/coverageBed.h
@@ -0,0 +1,83 @@
+/*****************************************************************************
+ coverageBed.h
+
+ (c) 2009 - Aaron Quinlan
+ Hall Laboratory
+ Department of Biochemistry and Molecular Genetics
+ University of Virginia
+ aaronquinlan@gmail.com
+
+ Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#ifndef COVERAGEBED_H
+#define COVERAGEBED_H
+
+#include "bedFile.h"
+
+#include "api/BamReader.h"
+#include "api/BamAux.h"
+#include "BlockedIntervals.h"
+using namespace BamTools;
+
+#include <vector>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <stdlib.h>
+
+using namespace std;
+
+//************************************************
+// Class methods and elements
+//************************************************
+class BedCoverage {
+
+public:
+
+ // constructor
+ BedCoverage(string &bedAFile, string &bedBFile, bool sameStrand, bool diffStrand, bool writeHistogram,
+ bool bamInput, bool obeySplits, bool eachBase, bool countsOnly);
+
+ // destructor
+ ~BedCoverage(void);
+
+private:
+
+ // input files.
+ string _bedAFile;
+ string _bedBFile;
+
+ // instance of a bed file class.
+ BedFile *_bedA, *_bedB;
+
+ // do we care about same or opposite strandedness when counting coverage?
+ bool _sameStrand;
+ bool _diffStrand;
+
+ // should we write a histogram for each feature in B?
+ bool _writeHistogram;
+
+ // are we dealing with BAM input for "A"?
+ bool _bamInput;
+
+ // should we split BED/BAM into discrete blocks?
+ bool _obeySplits;
+
+ // should discrete coverage be reported for each base in each feature?
+ bool _eachBase;
+
+ // should we just count overlaps and not try to describe the breadth?
+ bool _countsOnly;
+
+ // private function for reporting coverage information
+ void ReportCoverage();
+
+ // private function for reporting overlap counts
+ void ReportCounts();
+
+ void CollectCoverageBed();
+
+ void CollectCoverageBam(string bamFile);
+};
+#endif /* COVERAGEBED_H */
diff --git a/src/coverageFile/coverageFile.cpp b/src/coverageFile/coverageFile.cpp
new file mode 100644
index 0000000..57a6a24
--- /dev/null
+++ b/src/coverageFile/coverageFile.cpp
@@ -0,0 +1,203 @@
+/*
+ * coverageFile.cpp
+ *
+ * Created on: May 8, 2015
+ * Author: nek3d
+ */
+
+#include "coverageFile.h"
+
+CoverageFile::CoverageFile(ContextCoverage *context)
+: IntersectFile(context),
+ _depthArray(NULL),
+ _depthArrayCapacity(0),
+ _queryLen(0),
+ _totalQueryLen(0),
+ _queryOffset(0),
+ _floatValBuf(NULL)
+{
+ //allocate and initialize depth array.
+ //Use C's malloc and free becauase we want
+ //to be able to realloc.
+ //Not using vector because arrays are faster.
+ size_t newSize = sizeof(size_t) * DEFAULT_DEPTH_CAPACITY;
+ _depthArray = (size_t *)malloc(newSize);
+ memset(_depthArray, 0, newSize);
+ _depthArrayCapacity = DEFAULT_DEPTH_CAPACITY;
+
+ _floatValBuf = new char[floatValBufLen];
+}
+
+CoverageFile::~CoverageFile() {
+ free(_depthArray);
+ delete _floatValBuf;
+}
+
+
+void CoverageFile::processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits) {
+ makeDepthCount(hits);
+ _finalOutput.clear();
+
+ switch(upCast(_context)->getCoverageType()) {
+ case ContextCoverage::COUNT:
+ doCounts(outputMgr, hits);
+ break;
+
+ case ContextCoverage::PER_BASE:
+ doPerBase(outputMgr, hits);
+ break;
+
+ case ContextCoverage::HIST:
+ doHist(outputMgr, hits);
+ break;
+
+ case ContextCoverage::DEFAULT:
+ default:
+ doDefault(outputMgr, hits);
+ break;
+ }
+
+}
+
+void CoverageFile::cleanupHits(RecordKeyVector &hits) {
+ IntersectFile::cleanupHits(hits);
+ memset(_depthArray, 0, sizeof(size_t) * _queryLen);
+
+}
+
+void CoverageFile::giveFinalReport(RecordOutputMgr *outputMgr) {
+
+ //only give report for histogram option
+ if (upCast(_context)->getCoverageType() != ContextCoverage::HIST) {
+ return;
+ }
+
+ for (depthMapType::iterator iter = _finalDepthMap.begin(); iter != _finalDepthMap.end(); iter++) {
+ size_t depth = iter->first;
+ size_t basesAtDepth = iter->second;
+ float depthPct = (float)basesAtDepth / (float)_totalQueryLen;
+
+ _finalOutput = "all\t";
+ _finalOutput.append(depth);
+ _finalOutput.append("\t");
+ _finalOutput.append(basesAtDepth);
+ _finalOutput.append("\t");
+ _finalOutput.append(_totalQueryLen);
+ _finalOutput.append("\t");
+ format(depthPct);
+
+ outputMgr->printRecord(NULL, _finalOutput);
+ }
+
+}
+
+
+void CoverageFile::makeDepthCount(RecordKeyVector &hits) {
+ const Record *key = hits.getKey();
+ _queryOffset = key->getStartPos();
+ _queryLen = (size_t)(key->getEndPos() - _queryOffset);
+ _totalQueryLen += _queryLen;
+
+ //resize depth array if needed
+ if (_depthArrayCapacity < _queryLen) {
+ _depthArray = (size_t*)realloc(_depthArray, sizeof(size_t) * _queryLen);
+ _depthArrayCapacity = _queryLen;
+ memset(_depthArray, 0, sizeof(size_t) * _depthArrayCapacity);
+ }
+
+ //loop through hits, which may not be in sorted order, due to
+ //potential multiple databases, and increment the depth array as needed.
+ for (RecordKeyVector::const_iterator_type iter = hits.begin(); iter != hits.end(); iter = hits.next()) {
+ const Record *dbRec = *iter;
+ int dbStart = dbRec->getStartPos();
+ int dbEnd = dbRec->getEndPos();
+ int maxStart = max(_queryOffset, dbStart);
+ int minEnd = min(dbEnd, key->getEndPos());
+
+ for (int i=maxStart; i < minEnd; i++) {
+ _depthArray[i - _queryOffset]++;
+ }
+ }
+}
+
+
+size_t CoverageFile::countBasesAtDepth(size_t depth) {
+ size_t retCount = 0;
+ for (size_t i=0; i < _queryLen; i++) {
+ if (_depthArray[i] == depth) {
+ retCount++;
+ }
+ }
+ return retCount;
+}
+
+void CoverageFile::doCounts(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
+{
+ _finalOutput = hits.size();
+ outputMgr->printRecord(hits.getKey(), _finalOutput);
+}
+
+void CoverageFile::doPerBase(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
+{
+ //loop through all bases in query, printing full record and metrcis for each
+ const Record * queryRec = hits.getKey();
+ for (size_t i= 0; i < _queryLen; i++) {
+ _finalOutput = i +1;
+ _finalOutput.append("\t");
+ _finalOutput.append(_depthArray[i]);
+
+ outputMgr->printRecord(queryRec, _finalOutput);
+ }
+}
+
+void CoverageFile::doHist(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
+{
+ //make a map of depths to num bases with that depth
+
+ _currDepthMap.clear();
+ for (size_t i=0; i < _queryLen; i++) {
+ _currDepthMap[_depthArray[i]]++;
+ _finalDepthMap[_depthArray[i]]++;
+ }
+
+ for (depthMapType::iterator iter = _currDepthMap.begin(); iter != _currDepthMap.end(); iter++) {
+ size_t depth = iter->first;
+ size_t numBasesAtDepth = iter->second;
+ float coveredBases = (float)numBasesAtDepth / (float)_queryLen;
+
+ _finalOutput = depth;
+ _finalOutput.append("\t");
+ _finalOutput.append(numBasesAtDepth);
+ _finalOutput.append("\t");
+ _finalOutput.append(_queryLen);
+ _finalOutput.append("\t");
+ format(coveredBases);
+
+ outputMgr->printRecord(hits.getKey(), _finalOutput);
+ }
+
+}
+
+void CoverageFile::doDefault(RecordOutputMgr *outputMgr, RecordKeyVector &hits)
+{
+ size_t nonZeroBases = _queryLen - countBasesAtDepth(0);
+ float coveredBases = (float)nonZeroBases / (float)_queryLen;
+
+ _finalOutput = hits.size();
+ _finalOutput.append("\t");
+ _finalOutput.append(nonZeroBases);
+ _finalOutput.append("\t");
+ _finalOutput.append(_queryLen);
+ _finalOutput.append("\t");
+ format(coveredBases);
+
+ outputMgr->printRecord(hits.getKey(), _finalOutput);
+}
+
+void CoverageFile::format(float val)
+{
+ memset(_floatValBuf, 0, floatValBufLen);
+ sprintf(_floatValBuf, "%0.7f", val);
+ _finalOutput.append(_floatValBuf);
+}
+
diff --git a/src/coverageFile/coverageFile.h b/src/coverageFile/coverageFile.h
new file mode 100644
index 0000000..414f636
--- /dev/null
+++ b/src/coverageFile/coverageFile.h
@@ -0,0 +1,54 @@
+/*
+ * coverageFile.h
+ *
+ * Created on: May 8, 2015
+ * Author: nek3d
+ */
+
+#ifndef COVERAGEFILE_H_
+#define COVERAGEFILE_H_
+
+#include "intersectFile.h"
+#include "ContextCoverage.h"
+
+class CoverageFile : public IntersectFile {
+public:
+ CoverageFile(ContextCoverage *);
+ ~CoverageFile();
+ virtual void processHits(RecordOutputMgr *outputMgr, RecordKeyVector &hits);
+ virtual void cleanupHits(RecordKeyVector &hits);
+ virtual void giveFinalReport(RecordOutputMgr *outputMgr);
+
+
+protected:
+ QuickString _finalOutput;
+
+ size_t *_depthArray;
+ size_t _depthArrayCapacity;
+ size_t _queryLen;
+ size_t _totalQueryLen;
+ int _queryOffset;
+ static const int DEFAULT_DEPTH_CAPACITY = 1024;
+ char *_floatValBuf;
+ static const int floatValBufLen = 16;
+
+ typedef map<size_t, size_t> depthMapType;
+ depthMapType _currDepthMap;
+ depthMapType _finalDepthMap;
+
+ virtual ContextCoverage *upCast(ContextBase *context) { return static_cast<ContextCoverage*>(context); }
+ void makeDepthCount(RecordKeyVector &hits);
+
+ size_t countBasesAtDepth(size_t depth);
+
+ void doCounts(RecordOutputMgr *outputMgr, RecordKeyVector &hits);
+ void doPerBase(RecordOutputMgr *outputMgr, RecordKeyVector &hits);
+ void doHist(RecordOutputMgr *outputMgr, RecordKeyVector &hits);
+ void doDefault(RecordOutputMgr *outputMgr, RecordKeyVector &hits);
+
+ void format(float val);
+
+};
+
+
+#endif /* COVERAGEFILE_H_ */
diff --git a/src/coverageFile/coverageHelp.cpp b/src/coverageFile/coverageHelp.cpp
new file mode 100644
index 0000000..f683d67
--- /dev/null
+++ b/src/coverageFile/coverageHelp.cpp
@@ -0,0 +1,92 @@
+/*****************************************************************************
+ coverageMain.cpp
+
+ (c) 2009 - Aaron Quinlan
+ Hall Laboratory
+ Department of Biochemistry and Molecular Genetics
+ University of Virginia
+ aaronquinlan@gmail.com
+
+ Licenced under the GNU General Public License 2.0 license.
+******************************************************************************/
+#include "CommonHelp.h"
+
+
+void coverage_help(void) {
+
+ cerr << "\nTool: bedtools coverage (aka coverageBed)" << endl;
+ cerr << "Version: " << VERSION << "\n";
+ cerr << "Summary: Returns the depth and breadth of coverage of features from A" << endl;
+ cerr << "\t on the intervals in B." << endl << endl;
+
+ cerr << "Usage: " << "bedtools coverage" << " [OPTIONS] -a <bed/gff/vcf> -b <bed/gff/vcf>" << endl << endl;
+
+ cerr << "Options: " << endl;
+
+ cerr << "\t-abam\t" << "The A input file is in BAM format. Replaces -a." << endl << endl;
+
+ cerr << "\t-s\t" << "Require same strandedness. That is, only counts hits in A that" << endl;
+ cerr << "\t\toverlap B on the _same_ strand." << endl;
+ cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
+
+ cerr << "\t-S\t" << "Require different strandedness. That is, only report hits in A" << endl;
+ cerr << "\t\tthat overlap B on the _opposite_ strand." << endl;
+ cerr << "\t\t- By default, overlaps are counted without respect to strand." << endl << endl;
+
+ cerr << "\t-hist\t" << "Report a histogram of coverage for each feature in B" << endl;
+ cerr << "\t\tas well as a summary histogram for _all_ features in B." << endl << endl;
+ cerr << "\t\tOutput (tab delimited) after each feature in B:" << endl;
+ cerr << "\t\t 1) depth\n\t\t 2) # bases at depth\n\t\t 3) size of B\n\t\t 4) % of B at depth" << endl << endl;
+
+ cerr << "\t-d\t" << "Report the depth at each position in each B feature." << endl;
+ cerr << "\t\tPositions reported are one based. Each position" << endl;
+ cerr << "\t\tand depth follow the complete B feature." << endl << endl;
+
+ cerr << "\t-counts\t" << "Only report the count of overlaps, don't compute fraction, etc." << endl << endl;
+
+ cerr << "\t-split\t" << "Treat \"split\" BAM or BED12 entries as distinct BED intervals." << endl;
+ cerr << "\t\twhen computing coverage." << endl;
+ cerr << "\t\tFor BAM files, this uses the CIGAR \"N\" and \"D\" operations " << endl;
+ cerr << "\t\tto infer the blocks for computing coverage." << endl;
+ cerr << "\t\tFor BED12 files, this uses the BlockCount, BlockStarts," << endl;
+ cerr << "\t\tand BlockEnds fields (i.e., columns 10,11,12)." << endl << endl;
+
+
+ cerr << "\t-sorted\t" << "Use the \"chromsweep\" algorithm for sorted (-k1,1 -k2,2n) input." << endl << endl;
+
+ cerr << "\t-g\t" << "Provide a genome file to enforce consistent chromosome sort order" << endl;
+ cerr <<"\t\tacross input files. Only applies when used with -sorted option." << endl << endl;
+
+ cerr << "\t-header\t" << "Print the header from the A file prior to results." << endl << endl;
+
+ cerr << "\t-nobuf\t" << "Disable buffered output. Using this option will cause each line"<< endl;
+ cerr <<"\t\tof output to be printed as it is generated, rather than saved" << endl;
+ cerr <<"\t\tin a buffer. This will make printing large output files " << endl;
+
+ cerr <<"\t\tnoticeably slower, but can be useful in conjunction with" << endl;
+ cerr <<"\t\tother software tools and scripts that need to process one" << endl;
+ cerr <<"\t\tline of bedtools output at a time." << endl << endl;
+
+ cerr << "\t-names\t" << "When using multiple databases, provide an alias for each that" << endl;
+ cerr <<"\t\twill appear instead of a fileId when also printing the DB record." << endl << endl;
+
+ cerr << "\t-filenames" << "\tWhen using multiple databases, show each complete filename" << endl;
+ cerr <<"\t\t\tinstead of a fileId when also printing the DB record." << endl << endl;
+
+ cerr << "\t-sortout\t" << "When using multiple databases, sort the output DB hits" << endl;
+ cerr << "\t\t\tfor each record." << endl << endl;
+
+ cerr << "\t-nonamecheck\t" << "For sorted data, don't throw an error if the file has different naming conventions" << endl;
+ cerr << "\t\t\tfor the same chromosome. ex. \"chr1\" vs \"chr01\"." << endl << endl;
+
+ CommonHelp();
+
+ cerr << "Default Output: " << endl;
+ cerr << "\t" << " After each entry in B, reports: " << endl;
+ cerr << "\t 1) The number of features in A that overlapped the B interval." << endl;
+ cerr << "\t 2) The number of bases in B that had non-zero coverage." << endl;
+ cerr << "\t 3) The length of the entry in B." << endl;
+ cerr << "\t 4) The fraction of bases in B that had non-zero coverage." << endl << endl;
+
+ exit(1);
+}
diff --git a/src/intersectFile/intersectFile.cpp b/src/intersectFile/intersectFile.cpp
index c516c6c..5c5f10f 100644
--- a/src/intersectFile/intersectFile.cpp
+++ b/src/intersectFile/intersectFile.cpp
@@ -98,6 +98,7 @@ bool IntersectFile::nextUnsortedFind(RecordKeyVector &hits)
if (queryRecord == NULL) {
continue;
} else {
+ _context->testNameConventions(queryRecord);
hits.setKey(queryRecord);
_binTree->getHits(queryRecord, hits);
return true;
diff --git a/src/mapFile/Makefile b/src/mapFile/Makefile
index 55a9798..a3ac6d9 100644
--- a/src/mapFile/Makefile
+++ b/src/mapFile/Makefile
@@ -3,20 +3,6 @@ OBJ_DIR = ../../obj/
BIN_DIR = ../../bin/
TOOL_DIR = ../../src/
-# -------------------
-# define our includes
-# -------------------
-# INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-# -I$(UTILITIES_DIR)/gzstream/ \
-# -I$(UTILITIES_DIR)/genomeFile/ \
-# -I$(UTILITIES_DIR)/lineFileUtilities/ \
-# -I$(UTILITIES_DIR)/fileType/ \
-# -I$(UTILITIES_DIR)/BamTools/include \
-# -I$(UTILITIES_DIR)/BamTools-Ancillary \
-# -I$(UTILITIES_DIR)/chromsweep \
-# -I$(UTILITIES_DIR)/VectorOps \
-# -I$(UTILITIES_DIR)/version/
-
INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
diff --git a/src/mapFile/mapHelp.cpp b/src/mapFile/mapHelp.cpp
index ee81457..795799b 100644
--- a/src/mapFile/mapHelp.cpp
+++ b/src/mapFile/mapHelp.cpp
@@ -56,6 +56,7 @@ void map_help(void) {
cerr << "\t-nonamecheck\t" << "For sorted data, don't throw an error if the file has different naming conventions" << endl;
cerr << "\t\t\tfor the same chromosome. ex. \"chr1\" vs \"chr01\"." << endl << endl;
+ CommonHelp();
cerr << "Notes: " << endl;
cerr << "\t(1) Both input files must be sorted by chrom, then start." << endl << endl;
diff --git a/src/utils/Contexts/ContextBase.cpp b/src/utils/Contexts/ContextBase.cpp
index 27aa805..09f878b 100644
--- a/src/utils/Contexts/ContextBase.cpp
+++ b/src/utils/Contexts/ContextBase.cpp
@@ -650,7 +650,9 @@ bool ContextBase::parseIoBufSize(QuickString bufStr)
}
void ContextBase::testNameConventions(const Record *record) {
- if (getNameCheckDisabled() || _nameConventionWarningTripped) return;
+ //Do nothing if using the -nonamecheck option,
+ //warning already given, or record is unmapped BAM record
+ if (getNameCheckDisabled() || _nameConventionWarningTripped || record->isUnmapped()) return;
int fileIdx = record->getFileIdx();
diff --git a/src/utils/Contexts/ContextCoverage.cpp b/src/utils/Contexts/ContextCoverage.cpp
new file mode 100644
index 0000000..78f7cda
--- /dev/null
+++ b/src/utils/Contexts/ContextCoverage.cpp
@@ -0,0 +1,77 @@
+/*
+ * ContextCoverage.cpp
+ *
+ * Created on: May 8, 2015
+ * Author: nek3d
+ */
+
+#include "ContextCoverage.h"
+
+ContextCoverage::ContextCoverage()
+: _count(false),
+ _perBase(false),
+ _showHist(false),
+ _coverageType(DEFAULT)
+{
+
+}
+
+ContextCoverage::~ContextCoverage() {
+
+}
+
+bool ContextCoverage::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
+
+ for (_i=_skipFirstArgs; _i < argc; _i++) {
+ if (isUsed(_i - _skipFirstArgs)) {
+ continue;
+ }
+ if (strcmp(_argv[_i], "-d") == 0) {
+ if (!handle_d()) return false;
+ }
+ if (strcmp(_argv[_i], "-counts") == 0) {
+ if (!handle_c()) return false;
+ }
+ if (strcmp(_argv[_i], "-hist") == 0) {
+ if (!handle_hist()) return false;
+ }
+ }
+ return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
+}
+
+bool ContextCoverage::isValidState()
+{
+ if (!ContextIntersect::isValidState()) {
+ return false;
+ }
+ //Can only use one output option. Were two or more set?
+ if (((int)_count + (int)_perBase + (int)_showHist) > 1) {
+ _errorMsg = "\n***** ERROR: -counts, -d, and -hist are mutually exclusive options. *****";
+ return false;
+ }
+
+ return true;
+}
+
+bool ContextCoverage::handle_c()
+{
+ _count = true;
+ _coverageType = COUNT;
+ return ContextIntersect::handle_c();
+}
+
+bool ContextCoverage::handle_d()
+{
+ _perBase = true;
+ _coverageType = PER_BASE;
+ markUsed(_i - _skipFirstArgs);
+ return true;
+}
+
+bool ContextCoverage::handle_hist()
+{
+ _showHist = true;
+ _coverageType = HIST;
+ markUsed(_i - _skipFirstArgs);
+ return true;
+}
diff --git a/src/utils/Contexts/ContextCoverage.h b/src/utils/Contexts/ContextCoverage.h
new file mode 100644
index 0000000..5a7b6b0
--- /dev/null
+++ b/src/utils/Contexts/ContextCoverage.h
@@ -0,0 +1,40 @@
+/*
+ * ContextCoverage.h
+ *
+ * Created on: May 8, 2015
+ * Author: nek3d
+ */
+
+#ifndef CONTEXTCOVERAGE_H_
+#define CONTEXTCOVERAGE_H_
+
+
+#include "ContextIntersect.h"
+
+class ContextCoverage : public ContextIntersect {
+public:
+ ContextCoverage();
+ virtual ~ContextCoverage();
+ virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
+ virtual bool hasIntersectMethods() const { return true; }
+ virtual bool isValidState();
+
+ typedef enum { DEFAULT, COUNT, PER_BASE, HIST } coverageType;
+ coverageType getCoverageType() const { return _coverageType; }
+
+private:
+ bool _count;
+ bool _perBase;
+ bool _showHist;
+ coverageType _coverageType;
+
+ bool handle_c();
+ bool handle_d();
+ bool handle_hist();
+
+
+};
+
+
+
+#endif /* CONTEXTCOVERAGE_H_ */
diff --git a/src/utils/Contexts/ContextFisher.cpp b/src/utils/Contexts/ContextFisher.cpp
index 7e531e9..dc9ee41 100644
--- a/src/utils/Contexts/ContextFisher.cpp
+++ b/src/utils/Contexts/ContextFisher.cpp
@@ -21,13 +21,6 @@ bool ContextFisher::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
else if (strcmp(_argv[_i], "-exclude") == 0) {
if (!handle_exclude()) return false;
}
-// if (strcmp(_argv[_i], "-g") == 0) {
-// if (!handle_g()) return false;
-// }
-// if(strcmp(_argv[_i], "-m") == 0) {
-// markUsed(_i - _skipFirstArgs);
-// setUseMergedIntervals(true);
-// }
}
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
}
diff --git a/src/utils/Contexts/ContextIntersect.h b/src/utils/Contexts/ContextIntersect.h
index 5cdbfd5..32e42be 100644
--- a/src/utils/Contexts/ContextIntersect.h
+++ b/src/utils/Contexts/ContextIntersect.h
@@ -82,7 +82,7 @@ class ContextIntersect : public ContextBase {
virtual bool hasIntersectMethods() const { return true; }
-private:
+protected:
BlockMgr *_splitBlockMgr;
diff --git a/src/utils/Contexts/ContextMap.cpp b/src/utils/Contexts/ContextMap.cpp
index 1d79d7a..d72ee75 100644
--- a/src/utils/Contexts/ContextMap.cpp
+++ b/src/utils/Contexts/ContextMap.cpp
@@ -22,17 +22,6 @@ ContextMap::~ContextMap()
bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
- _argc = argc;
- _argv = argv;
- _skipFirstArgs = skipFirstArgs;
- if (_argc < 2) {
- setShowHelp(true);
- return false;
- }
-
- setProgram(_programNames[argv[0]]);
-
- _argsProcessed.resize(_argc - _skipFirstArgs, false);
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
diff --git a/src/utils/Contexts/ContextMerge.cpp b/src/utils/Contexts/ContextMerge.cpp
index 3da0d5d..5fee7a8 100644
--- a/src/utils/Contexts/ContextMerge.cpp
+++ b/src/utils/Contexts/ContextMerge.cpp
@@ -29,14 +29,6 @@ ContextMerge::~ContextMerge()
bool ContextMerge::parseCmdArgs(int argc, char **argv, int skipFirstArgs)
{
- _argc = argc;
- _argv = argv;
- _skipFirstArgs = skipFirstArgs;
-
- setProgram(_programNames[argv[0]]);
-
- _argsProcessed.resize(_argc - _skipFirstArgs, false);
-
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
diff --git a/src/utils/Contexts/ContextSample.cpp b/src/utils/Contexts/ContextSample.cpp
index 38fbcb3..132ba02 100644
--- a/src/utils/Contexts/ContextSample.cpp
+++ b/src/utils/Contexts/ContextSample.cpp
@@ -17,18 +17,6 @@ ContextSample::~ContextSample()
}
bool ContextSample::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
- _argc = argc;
- _argv = argv;
- _skipFirstArgs = skipFirstArgs;
- if (_argc < 2) {
- setShowHelp(true);
- return false;
- }
-
- setProgram(_programNames[argv[0]]);
-
- _argsProcessed.resize(_argc - _skipFirstArgs, false);
-
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
diff --git a/src/utils/Contexts/ContextSpacing.cpp b/src/utils/Contexts/ContextSpacing.cpp
index 1dc18c6..3cb8e7d 100644
--- a/src/utils/Contexts/ContextSpacing.cpp
+++ b/src/utils/Contexts/ContextSpacing.cpp
@@ -17,17 +17,6 @@ ContextSpacing::~ContextSpacing()
}
bool ContextSpacing::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
- _argc = argc;
- _argv = argv;
- _skipFirstArgs = skipFirstArgs;
- if (_argc < 2) {
- setShowHelp(true);
- return false;
- }
-
- setProgram(_programNames[argv[0]]);
-
- _argsProcessed.resize(_argc - _skipFirstArgs, false);
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
diff --git a/src/utils/Contexts/ContextSubtract.cpp b/src/utils/Contexts/ContextSubtract.cpp
index b0d2e61..830c9f5 100644
--- a/src/utils/Contexts/ContextSubtract.cpp
+++ b/src/utils/Contexts/ContextSubtract.cpp
@@ -23,18 +23,6 @@ ContextSubtract::~ContextSubtract()
bool ContextSubtract::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
- _argc = argc;
- _argv = argv;
- _skipFirstArgs = skipFirstArgs;
- if (_argc < 2) {
- setShowHelp(true);
- return false;
- }
-
- setProgram(_programNames[argv[0]]);
-
- _argsProcessed.resize(_argc - _skipFirstArgs, false);
-
for (_i=_skipFirstArgs; _i < argc; _i++) {
if (isUsed(_i - _skipFirstArgs)) {
continue;
@@ -48,7 +36,6 @@ bool ContextSubtract::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
if (strcmp(_argv[_i], "-N") == 0) {
if (!handle_N()) return false;
}
-
}
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
}
diff --git a/src/utils/Contexts/Makefile b/src/utils/Contexts/Makefile
index 5ae37ea..a3885ff 100644
--- a/src/utils/Contexts/Makefile
+++ b/src/utils/Contexts/Makefile
@@ -21,9 +21,9 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
# ----------------------------------
SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextFisher.cpp ContextFisher.h ContextMap.cpp \
ContextMap.h ContextSample.cpp ContextSpacing.cpp ContextSample.h ContextSpacing.h ContextMerge.h ContextMerge.cpp ContextJaccard.h ContextJaccard.cpp \
- ContextClosest.cpp ContextClosest.h ContextSubtract.cpp ContextSubtract.h
+ ContextClosest.cpp ContextClosest.h ContextSubtract.cpp ContextSubtract.h ContextCoverage.cpp ContextCoverage.h
OBJECTS= ContextBase.o ContextIntersect.o ContextFisher.o ContextMap.o ContextSample.o ContextSpacing.o ContextMerge.o ContextJaccard.o ContextClosest.o \
- ContextSubtract.o
+ ContextSubtract.o ContextCoverage.o
_EXT_OBJECTS=ParseTools.o QuickString.o
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
@@ -48,4 +48,5 @@ clean:
$(OBJ_DIR)/ContextJaccard.o \
$(OBJ_DIR)/ContextClosest.o \
$(OBJ_DIR)/ContextSubtract.o \
+ $(OBJ_DIR)/ContextCoverage.o
.PHONY: clean
diff --git a/src/utils/RecordOutputMgr/RecordOutputMgr.cpp b/src/utils/RecordOutputMgr/RecordOutputMgr.cpp
index b4f488b..419e41f 100644
--- a/src/utils/RecordOutputMgr/RecordOutputMgr.cpp
+++ b/src/utils/RecordOutputMgr/RecordOutputMgr.cpp
@@ -126,9 +126,13 @@ void RecordOutputMgr::printRecord(const Record *record)
void RecordOutputMgr::printRecord(const Record *record, const QuickString & value)
{
_afterVal = value;
- printRecord(record);
+ bool recordPrinted = false;
+ if (record != NULL) {
+ printRecord(record);
+ recordPrinted = true;
+ }
if (!value.empty()) {
- tab();
+ if (recordPrinted) tab();
_outBuf.append(value);
}
newline();
diff --git a/src/utils/aux/BedtoolsDriver.cpp b/src/utils/aux/BedtoolsDriver.cpp
index 147b562..998a70e 100644
--- a/src/utils/aux/BedtoolsDriver.cpp
+++ b/src/utils/aux/BedtoolsDriver.cpp
@@ -10,7 +10,7 @@
#include "ContextClosest.h"
#include "ContextSubtract.h"
#include "ContextSpacing.h"
-//#include "ContextCoverage.h"
+#include "ContextCoverage.h"
//tools
#include "intersectFile.h"
@@ -22,6 +22,7 @@
#include "sampleFile.h"
#include "spacingFile.h"
#include "fisher.h"
+#include "coverageFile.h"
BedtoolsDriver::BedtoolsDriver() {
_supported.insert("intersect");
@@ -33,6 +34,7 @@ BedtoolsDriver::BedtoolsDriver() {
_supported.insert("sample");
_supported.insert("spacing");
_supported.insert("fisher");
+ _supported.insert("coverage");
}
@@ -100,6 +102,8 @@ ContextBase *BedtoolsDriver::getContext()
context = new ContextSpacing();
} else if (_subCmd == "fisher") {
context = new ContextFisher();
+ } else if (_subCmd == "coverage") {
+ context = new ContextCoverage();
} else {
cerr << "Error: Tool " << _subCmd << " is not supported. Exiting..." << endl;
exit(1);
@@ -128,6 +132,8 @@ ToolBase *BedtoolsDriver::getTool(ContextBase *context)
tool = new SpacingFile(static_cast<ContextSpacing *>(context));
} else if (_subCmd == "fisher") {
tool = new Fisher(static_cast<ContextFisher *>(context));
+ } else if (_subCmd == "coverage") {
+ tool = new CoverageFile(static_cast<ContextCoverage *>(context));
}
else {
diff --git a/src/utils/aux/Makefile b/src/utils/aux/Makefile
index c915a5e..e7d5fb5 100644
--- a/src/utils/aux/Makefile
+++ b/src/utils/aux/Makefile
@@ -36,7 +36,7 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
-I$(TOOLS_DIR)/closestFile/ \
-I$(TOOLS_DIR)/clusterBed/ \
-I$(TOOLS_DIR)/complementBed/ \
- -I$(TOOLS_DIR)/coverageBed/ \
+ -I$(TOOLS_DIR)/coverageFile/ \
-I$(TOOLS_DIR)/expand/ \
-I$(TOOLS_DIR)/fastaFromBed/ \
-I$(TOOLS_DIR)/flankBed/ \
diff --git a/src/utils/general/QuickString.cpp b/src/utils/general/QuickString.cpp
index 5f53615..2ff4b24 100644
--- a/src/utils/general/QuickString.cpp
+++ b/src/utils/general/QuickString.cpp
@@ -105,6 +105,12 @@ QuickString &QuickString::operator = (uint32_t val) {
return *this;
}
+QuickString &QuickString::operator = (size_t val) {
+ clear();
+ append(val);
+ return *this;
+}
+
QuickString &QuickString::operator = (float val) {
clear();
append(val);
@@ -152,6 +158,10 @@ QuickString &QuickString::operator += (uint32_t num) {
return *this;
}
+QuickString &QuickString::operator += (size_t num) {
+ append(num);
+ return *this;
+}
QuickString &QuickString::operator += (float num) {
append(num);
return *this;
@@ -254,6 +264,10 @@ void QuickString::append(uint32_t num) {
int2str((int)num, *this, true);
}
+void QuickString::append(size_t num) {
+ int2str((int)num, *this, true);
+}
+
void QuickString::append(float num) {
append(ToString(num));
}
diff --git a/src/utils/general/QuickString.h b/src/utils/general/QuickString.h
index 93f34bb..aee6b9a 100644
--- a/src/utils/general/QuickString.h
+++ b/src/utils/general/QuickString.h
@@ -38,6 +38,7 @@ class QuickString {
QuickString &operator = (char);
QuickString &operator = (int);
QuickString &operator = (uint32_t);
+ QuickString &operator = (size_t);
QuickString &operator = (float);
QuickString &operator = (double);
QuickString &operator += (const QuickString &);
@@ -46,6 +47,7 @@ class QuickString {
QuickString &operator += (char);
QuickString &operator += (int);
QuickString &operator += (uint32_t);
+ QuickString &operator += (size_t);
QuickString &operator += (float);
QuickString &operator += (double);
@@ -71,6 +73,7 @@ class QuickString {
//for better performance.
void append(int num);
void append(uint32_t num);
+ void append(size_t num);
void append(float num);
void append(double num);
diff --git a/test/intersect/test-intersect.sh b/test/intersect/test-intersect.sh
index b6263a0..262d315 100644
--- a/test/intersect/test-intersect.sh
+++ b/test/intersect/test-intersect.sh
@@ -537,16 +537,15 @@ check exp obs
rm exp obs
##################################################################
-# see that -nonamecheck only works for sorted data
-# NOTE: This test is now deprecated, as the -nonamecheck option
-# must also now work with unsorted data
+# see that naming conventions are tested with unsorted data.
##################################################################
echo " intersect.t44...\c"
-echo ok
-#"***** ERROR: -nonamecheck option is only valid for sorted input. *****" > exp
-#$BT intersect -a nonamecheck_a.bed -b nonamecheck_b.bed -nonamecheck 2>&1 > /dev/null | cat - | head -2 | tail -1 > obs
-#check exp obs
-#rm exp obs
+echo \
+"***** WARNING: File nonamecheck_a.bed has a record where naming convention (leading zero) is inconsistent with other files:
+chr1 10 20" > exp
+$BT intersect -a nonamecheck_a.bed -b nonamecheck_b.bed 2>&1 > /dev/null | cat - | head -2 > obs
+check exp obs
+rm exp obs
##################################################################