Source code for deeptools.writeBedGraph_bam_and_bw

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import shutil
import tempfile
import numpy as np

# NGS packages
import pyBigWig

# own module
import mapReduce
from utilities import getCommonChrNames
from writeBedGraph import *
from deeptools import bamHandler

old_settings = np.seterr(all='ignore')


[docs]def getCoverageFromBigwig(bigwigHandle, chrom, start, end, tileSize, missingDataAsZero=False): try: coverage = np.asarray(bigwigHandle.values(chrom, start, end)) except TypeError: # this error happens when chromosome # is not into the bigwig file return [] if coverage is None: return [] if missingDataAsZero is True: coverage[np.isnan(coverage)] = 0 # average the values per bin cov = np.array( [np.mean(coverage[x:x + tileSize]) for x in range(0, len(coverage), tileSize)]) return cov
[docs]def writeBedGraph_wrapper(args): return writeBedGraph_worker(*args)
[docs]def writeBedGraph_worker( chrom, start, end, tileSize, defaultFragmentLength, bamOrBwFileList, func, funcArgs, extendPairedEnds=True, smoothLength=0, missingDataAsZero=False, fixed_step=False): r""" Writes a bedgraph having as base a number of bam files. The given func is called to compute the desired bedgraph value using the funcArgs tileSize """ if start > end: raise NameError("start position ({0}) bigger than " "end position ({1})".format(start, end)) coverage = [] for indexFile, fileFormat in bamOrBwFileList: if fileFormat == 'bam': bamHandle = bamHandler.openBam(indexFile) coverage.append(getCoverageFromBam( bamHandle, chrom, start, end, tileSize, defaultFragmentLength, extendPairedEnds, True)) bamHandle.close() elif fileFormat == 'bigwig': bigwigHandle = pyBigWig.open(indexFile) coverage.append( getCoverageFromBigwig( bigwigHandle, chrom, start, end, tileSize, missingDataAsZero)) bigwigHandle.close() # is /dev/shm available? # working in this directory speeds the process try: _file = tempfile.NamedTemporaryFile(dir="/dev/shm", delete=False) except OSError: _file = tempfile.NamedTemporaryFile(delete=False) previousValue = None lengthCoverage = len(coverage[0]) for tileIndex in xrange(lengthCoverage): tileCoverage = [] for index in range(len(bamOrBwFileList)): if smoothLength > 0: vectorStart, vectorEnd = getSmoothRange( tileIndex, tileSize, smoothLength, lengthCoverage) tileCoverage.append( np.mean(coverage[index][vectorStart:vectorEnd])) else: try: tileCoverage.append(coverage[index][tileIndex]) except IndexError: print "Chromosome {} probably not in one of the bigwig " \ "files. Remove this chromosome from the bigwig file " \ "to continue".format(chrom) exit(0) # if zerosToNans == True and sum(tileCoverage) == 0.0: # continue value = func(tileCoverage, funcArgs) if fixed_step: writeStart = start + tileIndex * tileSize writeEnd = min(writeStart + tileSize, end) try: _file.write("%s\t%d\t%d\t%.2f\n" % (chrom, writeStart, writeEnd, value)) except TypeError: _file.write("{}\t{}\t{}\t{}\n".format(chrom, writeStart, writeEnd, value)) else: if previousValue is None: writeStart = start + tileIndex * tileSize writeEnd = min(writeStart + tileSize, end) previousValue = value elif previousValue == value: writeEnd = min(writeEnd + tileSize, end) elif previousValue != value: if not np.isnan(previousValue): _file.write( "%s\t%d\t%d\t%.2f\n" % (chrom, writeStart, writeEnd, previousValue)) previousValue = value writeStart = writeEnd writeEnd = min(writeStart + tileSize, end) if not fixed_step: # write remaining value if not a nan if previousValue and writeStart != end and \ not np.isnan(previousValue): _file.write("%s\t%d\t%d\t%.1f\n" % (chrom, writeStart, end, previousValue)) # """ tempFileName = _file.name _file.close() return(tempFileName)
[docs]def writeBedGraph( bamOrBwFileList, outputFileName, fragmentLength, func, funcArgs, tileSize=25, region=None, blackListFileName=None, numberOfProcessors=None, format="bedgraph", extendPairedEnds=True, missingDataAsZero=False, smoothLength=0, fixed_step=False): r""" Given a list of bamfiles, a function and a function arguments, this method writes a bedgraph file (or bigwig) file for a partition of the genome into tiles of given size and a value for each tile that corresponds to the given function and that is related to the coverage underlying the tile. """ bamHandlers = [bamHandler.openBam(indexedFile) for indexedFile, fileFormat in bamOrBwFileList if fileFormat == 'bam'] if len(bamHandlers): genomeChunkLength = getGenomeChunkLength(bamHandlers, tileSize) # check if both bam files correspond to the same species # by comparing the chromosome names: chromNamesAndSize, __ = getCommonChrNames(bamHandlers, verbose=False) else: genomeChunkLength = int(10e6) bigwigs = [fileName for fileName, fileFormat in bamOrBwFileList if fileFormat == 'bigwig'] cCommon = [] chromNamesAndSize = {} for bw in bigwigs: bwh = pyBigWig.open(bw) for chromName, size in bwh.chroms().items(): if chromName in chromNamesAndSize: cCommon.append(chromName) if chromNamesAndSize[chromName] != size: print "\nWARNING\n" \ "Chromosome {} length reported in the " \ "bigwig files differ.\n{} for {}\n" \ "{} for {}.\n\nThe smallest " \ "length will be used".format( chromName, chromNamesAndSize[chromName], bigwigs[0], size, bw) chromNamesAndSize[chromName] = min( chromNamesAndSize[chromName], size) else: chromNamesAndSize[chromName] = size bwh.close() # get the list of common chromosome names and sizes chromNamesAndSize = [(k, v) for k, v in chromNamesAndSize.iteritems() if k in cCommon] if region: # in case a region is used, append the tilesize region += ":{}".format(tileSize) res = mapReduce.mapReduce((tileSize, fragmentLength, bamOrBwFileList, func, funcArgs, extendPairedEnds, smoothLength, missingDataAsZero, fixed_step), writeBedGraph_wrapper, chromNamesAndSize, genomeChunkLength=genomeChunkLength, region=region, blackListFileName=blackListFileName, numberOfProcessors=numberOfProcessors) # concatenate intermediary bedgraph files outFile = open(outputFileName + ".bg", 'wb') for tempFileName in res: if tempFileName: # concatenate all intermediate tempfiles into one # bedgraph file shutil.copyfileobj(open(tempFileName, 'rb'), outFile) os.remove(tempFileName) bedGraphFile = outFile.name outFile.close() if format == 'bedgraph': os.rename(bedGraphFile, outputFileName) if debug: print "output file: %s" % (outputFileName) else: bedGraphToBigWig( chromNamesAndSize, bedGraphFile, outputFileName, True) if debug: print "output file: %s" % (outputFileName) os.remove(bedGraphFile)