From 11355f28347000fbe4362d881eead0efcf67868c Mon Sep 17 00:00:00 2001 From: jacarey Date: Tue, 11 Apr 2017 15:09:15 -0400 Subject: [PATCH] Add CBCL parser (reader), which is not currently plugged into any CLPs but will be in future PRs. --- .../illumina/parser/readers/BaseBclReader.java | 160 +++++++++++ .../picard/illumina/parser/readers/BclReader.java | 85 +----- .../picard/illumina/parser/readers/CbclReader.java | 305 +++++++++++++++++++++ .../illumina/parser/readers/CbclReaderTest.java | 52 ++++ testdata/picard/illumina/readerTests/C100.1.cbcl | Bin 0 -> 125 bytes testdata/picard/illumina/readerTests/C99.1.cbcl | Bin 0 -> 125 bytes .../picard/illumina/readerTests/tile_1101.filter | Bin 0 -> 16 bytes .../picard/illumina/readerTests/tile_1102.filter | Bin 0 -> 16 bytes 8 files changed, 525 insertions(+), 77 deletions(-) create mode 100644 src/main/java/picard/illumina/parser/readers/BaseBclReader.java create mode 100644 src/main/java/picard/illumina/parser/readers/CbclReader.java create mode 100644 src/test/java/picard/illumina/parser/readers/CbclReaderTest.java create mode 100644 testdata/picard/illumina/readerTests/C100.1.cbcl create mode 100644 testdata/picard/illumina/readerTests/C99.1.cbcl create mode 100644 testdata/picard/illumina/readerTests/tile_1101.filter create mode 100644 testdata/picard/illumina/readerTests/tile_1102.filter diff --git a/src/main/java/picard/illumina/parser/readers/BaseBclReader.java b/src/main/java/picard/illumina/parser/readers/BaseBclReader.java new file mode 100644 index 000000000..30783722a --- /dev/null +++ b/src/main/java/picard/illumina/parser/readers/BaseBclReader.java @@ -0,0 +1,160 @@ +package picard.illumina.parser.readers; + +import htsjdk.samtools.Defaults; +import htsjdk.samtools.util.BlockCompressedInputStream; +import htsjdk.samtools.util.IOUtil; +import picard.PicardException; +import picard.illumina.parser.BclData; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.GZIPInputStream; + +class BaseBclReader { + private static final byte BASE_MASK = 0x0003; + private static final byte[] BASE_LOOKUP = new byte[]{'A', 'C', 'G', 'T'}; + final InputStream[] streams; + final File[] streamFiles; + final int[] outputLengths; + private BclQualityEvaluationStrategy bclQualityEvaluationStrategy; + final int[] numClustersPerCycle; + final int cycles; + + BaseBclReader(int[] outputLengths, BclQualityEvaluationStrategy bclQualityEvaluationStrategy) { + this.outputLengths = outputLengths; + this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy; + + int cycles = 0; + for (final int outputLength : outputLengths) { + cycles += outputLength; + } + + this.cycles = cycles; + this.streams = new InputStream[cycles]; + this.streamFiles = new File[cycles]; + this.numClustersPerCycle = new int[cycles]; + } + + BaseBclReader(int[] outputLengths) { + this.outputLengths = outputLengths; + + int cycles = 0; + for (final int outputLength : outputLengths) { + cycles += outputLength; + } + + this.cycles = cycles; + this.streams = new InputStream[cycles]; + this.streamFiles = new File[cycles]; + this.numClustersPerCycle = new int[cycles]; + } + + int getNumCycles() { + return cycles; + } + + int getNumClusters() { + return numClustersPerCycle[0]; + } + + InputStream open(final File file, final boolean seekable, final boolean isGzip, final boolean isBgzf) throws IOException { + final String filePath = file.getAbsolutePath(); + + try { + // Open up a buffered stream to read from the file and optionally wrap it in a gzip stream if necessary + if (isBgzf) { + // Only BlockCompressedInputStreams can seek, and only if they are fed a SeekableStream. + return new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file)); + } else if (isGzip) { + if (seekable) { + throw new IllegalArgumentException( + String.format("Cannot create a seekable reader for gzip bcl: %s.", filePath) + ); + } + return (IOUtil.maybeBufferInputStream(new GZIPInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE / 2), + Defaults.BUFFER_SIZE / 2)); + } else { + if (seekable) { + throw new IllegalArgumentException( + String.format("Cannot create a seekable reader for provided bcl: %s.", filePath) + ); + } + return IOUtil.maybeBufferInputStream(new FileInputStream(file)); + } + } catch (final FileNotFoundException fnfe) { + throw new PicardException("File not found: (" + filePath + ")", fnfe); + } catch (final IOException ioe) { + throw new PicardException("Error reading file: (" + filePath + ")", ioe); + } + } + + void decodeBasecall(final BclData bclData, final int read, final int cycle, final int byteToDecode) { + if (byteToDecode == 0) { + bclData.bases[read][cycle] = (byte) '.'; + bclData.qualities[read][cycle] = (byte) 2; + } else { + bclData.bases[read][cycle] = BASE_LOOKUP[byteToDecode & BASE_MASK]; + bclData.qualities[read][cycle] = bclQualityEvaluationStrategy.reviseAndConditionallyLogQuality((byte) (byteToDecode >>> 2)); + } + } + + void decodeQualityBinnedBasecall(final BclData bclData, final int read, final int cycle, final int byteToDecode, + final CycleData cycleData) { + if (byteToDecode == 0) { + bclData.bases[read][cycle] = (byte) '.'; + bclData.qualities[read][cycle] = 2; + } else { + bclData.bases[read][cycle] = BASE_LOOKUP[byteToDecode & BASE_MASK]; + bclData.qualities[read][cycle] = cycleData.qualityBins[byteToDecode >>> 2]; + } + } + + class CycleData { + final short version; + final int headerSize; + + final byte bitsPerBasecall; + final byte bitsPerQualityScore; + final int numberOfBins; + final byte[] qualityBins; + + final int numTiles; + final TileData[] tileInfo; + final boolean pfExcluded; + + CycleData(final short version, final int headerSize, final byte bitsPerBasecall, + final byte bitsPerQualityScore, final int numberOfBins, final byte[] qualityBins, + final int numTiles, final TileData[] tileInfo, final boolean pfExcluded) { + this.version = version; + this.headerSize = headerSize; + this.bitsPerBasecall = bitsPerBasecall; + this.bitsPerQualityScore = bitsPerQualityScore; + this.numberOfBins = numberOfBins; + this.qualityBins = qualityBins; + this.numTiles = numTiles; + this.tileInfo = tileInfo; + this.pfExcluded = pfExcluded; + } + + + } + + + class TileData { + final int tileNum; + final int numClustersInTile; + final int uncompressedBlockSize; + final int compressedBlockSize; + + TileData(final int tileNum, final int numClustersInTile, final int uncompressedBlockSize, + final int compressedBlockSize) { + this.tileNum = tileNum; + this.numClustersInTile = numClustersInTile; + this.uncompressedBlockSize = uncompressedBlockSize; + this.compressedBlockSize = compressedBlockSize; + } + } +} diff --git a/src/main/java/picard/illumina/parser/readers/BclReader.java b/src/main/java/picard/illumina/parser/readers/BclReader.java index 06c6b804c..c7b268b89 100644 --- a/src/main/java/picard/illumina/parser/readers/BclReader.java +++ b/src/main/java/picard/illumina/parser/readers/BclReader.java @@ -23,7 +23,6 @@ */ package picard.illumina.parser.readers; -import htsjdk.samtools.Defaults; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; @@ -36,7 +35,6 @@ import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -72,32 +70,14 @@ *

* So the output base/quality will be a (T/34) */ -public class BclReader implements CloseableIterator { - private static final byte BASE_MASK = 0x0003; +public class BclReader extends BaseBclReader implements CloseableIterator { private static final int HEADER_SIZE = 4; - private static final byte[] BASE_LOOKUP = new byte[]{'A', 'C', 'G', 'T'}; - - private final InputStream[] streams; - private final File[] streamFiles; - private final int[] outputLengths; - int[] numClustersPerCycle; - - private final BclQualityEvaluationStrategy bclQualityEvaluationStrategy; - private BclData queue = null; + protected BclData queue = null; public BclReader(final List bclsForOneTile, final int[] outputLengths, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) { + super(outputLengths, bclQualityEvaluationStrategy); try { - this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy; - this.outputLengths = outputLengths; - - int cycles = 0; - for (final int outputLength : outputLengths) { - cycles += outputLength; - } - this.streams = new InputStream[cycles]; - this.streamFiles = new File[cycles]; - this.numClustersPerCycle = new int[cycles]; final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE); byteBuffer.order(ByteOrder.LITTLE_ENDIAN); @@ -173,14 +153,8 @@ private static long getNumberOfClusters(final String filePath, final InputStream public BclReader(final File bclFile, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean seekable) { + super(new int[]{1}, bclQualityEvaluationStrategy); try { - - this.outputLengths = new int[]{1}; - this.streams = new InputStream[1]; - this.streamFiles = new File[1]; - this.numClustersPerCycle = new int[]{1}; - this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy; - final ByteBuffer byteBuffer = ByteBuffer.allocate(HEADER_SIZE); final String filePath = bclFile.getName(); final boolean isGzip = filePath.endsWith(".gz"); @@ -195,7 +169,7 @@ public BclReader(final File bclFile, final BclQualityEvaluationStrategy bclQuali byteBuffer.order(ByteOrder.LITTLE_ENDIAN); this.numClustersPerCycle[0] = byteBuffer.getInt(); if (!isBgzf && !isGzip) { - assertProperFileStructure(bclFile, this.numClustersPerCycle[0], stream); + assertProperFileStructure(bclFile, this.getNumClusters(), stream); } this.streams[0] = stream; this.streamFiles[0] = bclFile; @@ -212,38 +186,6 @@ void assertProperFileStructure(final File file, final int numClusters, final Inp } } - InputStream open(final File file, final boolean seekable, final boolean isGzip, final boolean isBgzf) throws IOException { - final String filePath = file.getAbsolutePath(); - - try { - // Open up a buffered stream to read from the file and optionally wrap it in a gzip stream - // if necessary - if (isBgzf) { - // Only BlockCompressedInputStreams can seek, and only if they are fed a SeekableStream. - return new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file)); - } else if (isGzip) { - if (seekable) { - throw new IllegalArgumentException( - String.format("Cannot create a seekable reader for gzip bcl: %s.", filePath) - ); - } - return (IOUtil.maybeBufferInputStream(new GZIPInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE / 2), - Defaults.BUFFER_SIZE / 2)); - } else { - if (seekable) { - throw new IllegalArgumentException( - String.format("Cannot create a seekable reader for provided bcl: %s.", filePath) - ); - } - return IOUtil.maybeBufferInputStream(new FileInputStream(file)); - } - } catch (final FileNotFoundException fnfe) { - throw new PicardException("File not found: (" + filePath + ")", fnfe); - } catch (final IOException ioe) { - throw new PicardException("Error reading file: (" + filePath + ")", ioe); - } - } - public void close() { for (final InputStream stream : this.streams) { CloserUtil.close(stream); @@ -258,14 +200,10 @@ public boolean hasNext() { return queue != null; } - private long getNumClusters() { - return numClustersPerCycle[0]; - } - protected void assertProperFileStructure(final File file) { final long elementsInFile = file.length() - HEADER_SIZE; - if (numClustersPerCycle[0] != elementsInFile) { - throw new PicardException("Expected " + numClustersPerCycle[0] + " in file " + file.getAbsolutePath() + " but found " + elementsInFile); + if (getNumClusters() != elementsInFile) { + throw new PicardException("Expected " + getNumClusters() + " in file " + file.getAbsolutePath() + " but found " + elementsInFile); } } @@ -306,14 +244,7 @@ void advance() { return; } - if (readByte == 0) { - //NO CALL, don't confuse with an A call - data.bases[read][cycle] = (byte) '.'; - data.qualities[read][cycle] = (byte) 2; - } else { - data.bases[read][cycle] = BASE_LOOKUP[readByte & BASE_MASK]; - data.qualities[read][cycle] = bclQualityEvaluationStrategy.reviseAndConditionallyLogQuality((byte) (readByte >>> 2)); - } + decodeBasecall(data, read, cycle, readByte); totalCycleCount++; } catch (final IOException ioe) { throw new RuntimeIOException(ioe); diff --git a/src/main/java/picard/illumina/parser/readers/CbclReader.java b/src/main/java/picard/illumina/parser/readers/CbclReader.java new file mode 100644 index 000000000..8557a394a --- /dev/null +++ b/src/main/java/picard/illumina/parser/readers/CbclReader.java @@ -0,0 +1,305 @@ +package picard.illumina.parser.readers; + +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.CloserUtil; +import htsjdk.samtools.util.Log; +import htsjdk.samtools.util.ProgressLogger; +import htsjdk.samtools.util.RuntimeIOException; +import picard.illumina.parser.BclData; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +/** + * ------------------------------------- CBCL Header ----------------------------------- + * Bytes 0 - 1 Version number, current version is 1 unsigned 16 bits little endian integer + * Bytes 2 - 5 Header size unsigned 32 bits little endian integer + * Byte 6 Number of bits per basecall unsigned + * Byte 7 Number of bits per q-score unsigned + *

+ * q-val mapping info + * Bytes 0-3 Number of bins (B), zero indicates no mapping + * B pairs of 4 byte values (if B > 0) {from, to}, {from, to}, {from, to} …from: quality score bin to: quality score + *

+ * Number of tile records unsigned 32bits little endian integer + *

+ * gzip virtual file offsets, one record per tile + * Bytes 0-3: tile number + * Bytes 4-7 Number of clusters that were written into the current block (required due to bit-packed q-scores) + * unsigned 32 bit integer + *

+ * Bytes 8-11 Uncompressed block size of the tile data (useful for sanity check whenexcluding non-PF clusters) + * unsigned 32 bit integer + *

+ * Bytes 12-15 Compressed block size of the tile data unsigned 32 bit integer + *

+ * non-PF clusters excluded flag 1: non-PF clusters are excluded 0: non-PF clusters are included + *

+ * ------------------------------------- CBCL File Content ----------------------------------- + *

+ * N blocks of gzip files, where N is the number of tiles. + *

+ * Each block consists of C number of basecall, quality score pairs where C is the number of clusters for the given tile. + *

+ * Each basecall, quality score pair has the following format (assuming 2 bits are used for the basecalls): + * Bits 0-1: Basecalls (respectively [A, C, G, T] for [00, 01, 10, 11]) + * Bits 2 and up: Quality score (unsigned Q bit little endian integer where Q is the number of bits per q-score). + * For a two bit quality score, this is two clusters per byte where the bottom 4 bits are the first cluster and the + * higher 4 bits are the second cluster. + **/ + +public class CbclReader extends BaseBclReader implements CloseableIterator { + + private static Log log = Log.getInstance(CbclReader.class); + + private ByteBuffer[] cachedTile; + + private int[] currentTile; + + private List queue = new ArrayList<>(); + + private final CycleData[] cycleData; + private Map filters; + private Map cachedFilter = new HashMap<>(); + + void addFilters(Map filters) { + this.filters = filters; + } + + private static final int INITIAL_HEADER_SIZE = 6; + + public CbclReader(final List cbcls, final int[] outputLengths) { + super(outputLengths); + cycleData = new CycleData[cycles]; + cachedTile = new ByteBuffer[cycles]; + currentTile = new int[cycles]; + + try { + final ByteBuffer byteBuffer = ByteBuffer.allocate(INITIAL_HEADER_SIZE); + byteBuffer.order(ByteOrder.LITTLE_ENDIAN); + for (int i = 0; i < cycles; i++) { + currentTile[i] = 0; + final File bclFile = cbcls.get(i); + + final InputStream stream = open(bclFile, false, false, false); + int read = stream.read(byteBuffer.array()); + + //we need to read the first 6 bytes to determine the header size + if (read != INITIAL_HEADER_SIZE) { + close(); + throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile())); + } + + short version = byteBuffer.getShort(); + int headerSize = byteBuffer.getInt(); + + final ByteBuffer headerBuffer = ByteBuffer.allocate(headerSize - INITIAL_HEADER_SIZE); + headerBuffer.order(ByteOrder.LITTLE_ENDIAN); + + read = stream.read(headerBuffer.array()); + if (read != headerSize - INITIAL_HEADER_SIZE) { + close(); + throw new RuntimeIOException(String.format("BCL %s has invalid header structure.", bclFile.getAbsoluteFile())); + } + + byte bitsPerBasecall = headerBuffer.get(); + byte bitsPerQualityScore = headerBuffer.get(); + + if (bitsPerBasecall != 2 && bitsPerBasecall != bitsPerQualityScore) { + close(); + throw new RuntimeIOException("CBCL data not encoded in nibbles. (not currently supported)"); + } + + int numberOfBins = headerBuffer.getInt(); + + byte[] qualityBins = new byte[numberOfBins]; + //each bin has a pair of 4 byte mappings + for (int j = 0; j < numberOfBins; j++) { + headerBuffer.getInt(); // first int is "from" value, which we don't need + int to = headerBuffer.getInt(); + qualityBins[j] = (byte) to; + } + + int numTiles = headerBuffer.getInt(); + TileData[] tileInfo = new TileData[numTiles]; + for (int j = 0; j < numTiles; j++) { + int tileNum = headerBuffer.getInt(); + int numClustersInTile = headerBuffer.getInt(); + int uncompressedBlockSize = headerBuffer.getInt(); + int compressedBlockSize = headerBuffer.getInt(); + tileInfo[j] = new TileData(tileNum, numClustersInTile, uncompressedBlockSize, compressedBlockSize); + } + + boolean pfExcluded = headerBuffer.get() == 1; + cycleData[i] = new CycleData(version, headerSize, bitsPerBasecall, bitsPerQualityScore, numberOfBins, qualityBins, numTiles, tileInfo, pfExcluded); + this.streams[i] = stream; + this.streamFiles[i] = bclFile; + byteBuffer.clear(); + headerBuffer.clear(); + } + + + } catch (final IOException ioe) { + throw new RuntimeIOException(ioe); + } + } + + @Override + public boolean hasNext() { + if (queue.isEmpty()) { + advance(); + } + return !queue.isEmpty(); + } + + public BclData next() { + if (queue.isEmpty()) { + advance(); + } + + final BclData data = queue.get(0); + queue.remove(0); + return data; + } + + @Override + public void close() { + for (final InputStream stream : this.streams) { + CloserUtil.close(stream); + } + } + + private void advance() { + int totalCycleCount = 0; + BclData data = new BclData(outputLengths); + + for (int read = 0; read < outputLengths.length; read++) { + for (int cycle = 0; cycle < outputLengths[read]; cycle++) { + try { + CycleData currentCycleData = cycleData[totalCycleCount]; + TileData currentTileData = currentCycleData.tileInfo[currentTile[totalCycleCount]]; + try { + if (cachedTile[totalCycleCount] == null) { + if (!cachedFilter.containsKey(currentTileData.tileNum)) { + cacheFilter(currentTileData); + } + cacheTile(totalCycleCount, currentTileData, currentCycleData); + } + } catch (IOException e) { + // when logging the error, increment cycle by 1, since totalCycleCount is zero-indexed but Illumina directories are 1-indexed. + throw new IOException(String.format("Error while reading from BCL file for cycle %d. Offending file on disk is %s", + (totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath()), e); + } + + if (!cachedTile[totalCycleCount].hasRemaining()) { + //on to the next tile + currentTile[totalCycleCount]++; + //if past the last tile then return + if (currentTile[totalCycleCount] > currentCycleData.tileInfo.length - 1) { + return; + } + currentTileData = currentCycleData.tileInfo[currentTile[totalCycleCount]]; + if (!cachedFilter.containsKey(currentTileData.tileNum)) { + cacheFilter(currentTileData); + } + cacheTile(totalCycleCount, currentTileData, currentCycleData); + } + + int singleByte = cachedTile[totalCycleCount].get(); + + decodeQualityBinnedBasecall(data, read, cycle, singleByte, currentCycleData); + + totalCycleCount++; + } catch (final IOException ioe) { + throw new RuntimeIOException(ioe); + } + + } + } + this.queue.add(data); + } + + private void cacheFilter(TileData currentTileData) { + boolean[] filterValues = new boolean[currentTileData.numClustersInTile]; + FilterFileReader reader = new FilterFileReader(filters.get(currentTileData.tileNum)); + int count = 0; + while (reader.hasNext()) { + filterValues[count] = reader.next(); + count++; + } + cachedFilter.put(currentTileData.tileNum, filterValues); + } + + private void cacheTile(int totalCycleCount, TileData tileData, CycleData currentCycleData) throws IOException { + ByteBuffer tileByteBuffer = ByteBuffer.allocate(tileData.compressedBlockSize); + //we are going to explode the nibbles in to bytes to make PF filtering easier + ByteBuffer tempUncompressedByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize); + ByteBuffer uncompressedByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize); + ByteBuffer unNibbledByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize * 2); + + tileByteBuffer.order(ByteOrder.LITTLE_ENDIAN); + uncompressedByteBuffer.order(ByteOrder.LITTLE_ENDIAN); + + // Read the whole compressed block into a buffer, then sanity check the length + int readBytes = this.streams[totalCycleCount].read(tileByteBuffer.array()); + if (readBytes != tileData.compressedBlockSize) { + throw new IOException(String.format("Error while reading from BCL file for cycle %d. Offending file on disk is %s", + (totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath())); + } + + // Uncompress the data from the buffer we just wrote - use gzip input stream to write to uncompressed buffer + ByteArrayInputStream byteInputStream = new ByteArrayInputStream(tileByteBuffer.array()); + if (cachedTile[totalCycleCount] != null) { + //clear the old cache + cachedTile[totalCycleCount] = null; + } + GZIPInputStream gzipInputStream = new GZIPInputStream(byteInputStream, uncompressedByteBuffer.capacity()); + int read = 0; + int totalRead = 0; + while ((read = gzipInputStream.read(tempUncompressedByteBuffer.array(), 0, tempUncompressedByteBuffer.capacity())) != -1) { + uncompressedByteBuffer.put(tempUncompressedByteBuffer.array(), 0, read); + totalRead += read; + } + if (totalRead != tileData.uncompressedBlockSize) { + throw new IOException(String.format("Error while decompressing from BCL file for cycle %d. Offending file on disk is %s", + (totalCycleCount + 1), this.streamFiles[totalCycleCount].getAbsolutePath())); + } + + // Read uncompressed data from the buffer and expand each nibble into a full byte for ease of use + uncompressedByteBuffer.flip(); + while (uncompressedByteBuffer.hasRemaining()) { + byte singleByte = uncompressedByteBuffer.get(); + unNibbledByteBuffer.put((byte) (singleByte & 0x0f)); + unNibbledByteBuffer.put((byte) ((singleByte >> 4) & 0x0f)); + } + gzipInputStream.close(); + + // Write buffer contents to cached tile array + unNibbledByteBuffer.flip(); + //if nonPF reads are included we need to strip them out + if (!currentCycleData.pfExcluded) { + ByteBuffer uncompressedFilteredByteBuffer = ByteBuffer.allocate(tileData.uncompressedBlockSize * 2); + FilterFileReader reader = new FilterFileReader(filters.get(tileData.tileNum)); + while (reader.hasNext()) { + byte readByte = unNibbledByteBuffer.get(); + if (reader.next()) { + uncompressedFilteredByteBuffer.put(readByte); + } + } + uncompressedFilteredByteBuffer.flip(); + cachedTile[totalCycleCount] = uncompressedFilteredByteBuffer.slice(); + } else { + cachedTile[totalCycleCount] = unNibbledByteBuffer; + } + } + +} diff --git a/src/test/java/picard/illumina/parser/readers/CbclReaderTest.java b/src/test/java/picard/illumina/parser/readers/CbclReaderTest.java new file mode 100644 index 000000000..80f9956f8 --- /dev/null +++ b/src/test/java/picard/illumina/parser/readers/CbclReaderTest.java @@ -0,0 +1,52 @@ +package picard.illumina.parser.readers; + +import org.testng.Assert; +import org.testng.annotations.Test; +import picard.illumina.parser.BclData; + +import java.io.File; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +public class CbclReaderTest { + + public static final File TestDataDir = new File("testdata/picard/illumina/readerTests"); + public static final File PASSING_CBCL_C99_1 = new File(TestDataDir, "C99.1.cbcl"); + public static final File PASSING_CBCL_C100_1 = new File(TestDataDir, "C100.1.cbcl"); + public static final File TILE_1101_FILTER = new File(TestDataDir, "tile_1101.filter"); + public static final File TILE_1102_FILTER = new File(TestDataDir, "tile_1102.filter"); + + public static final char[] expectedBases = new char[]{ + 'G','G','C','C','G','A','A','G','T','A','C','C','C','T','G','A' + }; + + public static final int[] expectedQuals = new int[]{ + 37,37,37,37,37,37,37,12,37,37,37,37,37,37,12,37 + }; + + @Test + public void testReadValidFile() { + final CbclReader reader = new CbclReader(Arrays.asList(PASSING_CBCL_C99_1, PASSING_CBCL_C100_1), new int[] {2}); + Map filters = new HashMap<>(); + filters.put(1101, TILE_1101_FILTER); + filters.put(1102, TILE_1102_FILTER); + reader.addFilters(filters); + + int i = 0; + while (reader.hasNext()) { + final BclData bv = reader.next(); + for (int cluster = 0; cluster < bv.bases.length; cluster++) { + for (int cycle = 0; cycle < bv.bases[cluster].length; cycle++) { + String actual = new String(new byte[]{bv.bases[cluster][cycle]}); + String expected = new String(new char[]{expectedBases[i]}); + Assert.assertEquals(actual, expected, "For cluster " + cluster + " cycle " + cycle + ","); + Assert.assertEquals(bv.qualities[cluster][cycle], expectedQuals[i], "For cluster " + cluster + " cycle " + cycle + ","); + i++; + } + } + } + Assert.assertEquals(i, expectedBases.length); + reader.close(); + } +} diff --git a/testdata/picard/illumina/readerTests/C100.1.cbcl b/testdata/picard/illumina/readerTests/C100.1.cbcl new file mode 100644 index 0000000000000000000000000000000000000000..f776a938e9591da7310caefdaea97c9bf821f267 GIT binary patch literal 125 zcmZQ%2xMSjU}9nck}$vsWbgnn6A+67F*6XWLixTxg&?&cIWZviL*p~bcXKeDss9?m b!n}JrGlT5)G%1i$B++6yhO_Sd=^#-6v1txW literal 0 HcmV?d00001 diff --git a/testdata/picard/illumina/readerTests/C99.1.cbcl b/testdata/picard/illumina/readerTests/C99.1.cbcl new file mode 100644 index 0000000000000000000000000000000000000000..002cd52e5db6e5e21f828cd3d72e9b2ddf889e2c GIT binary patch literal 125 zcmZQ%2xMSjU}9nck}$vsWbgnn6A+67F*6XWLixTxg&?&cIWZviL*p~bcXKeDss9?m b!o0hjkKv~1J|&P+B+>cY45zMDm4HM6z3C4w literal 0 HcmV?d00001 diff --git a/testdata/picard/illumina/readerTests/tile_1101.filter b/testdata/picard/illumina/readerTests/tile_1101.filter new file mode 100644 index 0000000000000000000000000000000000000000..445bc582ee821157d8d3247869e1991a55bdd29c GIT binary patch literal 16 ScmZQzU|?VdViq7~1Ofm6UH}XL literal 0 HcmV?d00001 diff --git a/testdata/picard/illumina/readerTests/tile_1102.filter b/testdata/picard/illumina/readerTests/tile_1102.filter new file mode 100644 index 0000000000000000000000000000000000000000..445bc582ee821157d8d3247869e1991a55bdd29c GIT binary patch literal 16 ScmZQzU|?VdViq7~1Ofm6UH}XL literal 0 HcmV?d00001