Python包-pysam

文件读写

文件读写句柄

bam_file : bam文件路径

读取文件

1
ReadBam = pysam.AlignmentFile(bam_file)  # 读取文件句柄

写入文件

1
2
3
4
5
6
7
WriteBam = pysam.AlignmentFile(bam_file , "wb", header=pysamFile.header)
for read in samfile.fetch():
if read.is_paired:
WriteBam.write(read)

pairedreads.close()
samfile.close()

Bam操作

提取特定区域的Reads

1
2
3
4
5
6
7
# 获取染色体chr1 上 100~120bp的reads
for read in samfile.fetch('chr1', 100, 120):
print(read)

samfile.close()

pileup(self, contig=None, start=None, stop=None, region=None, reference=None, end=None, **kwargs)

文件函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# region : 提取的区域
# mapq : 比对质量
# baseq :碱基质量
# ref :参考基因组



for pileup_column in bam_file.pileup(region="chr1:1-10",mapq=mapq , baseq = baseq,
stepper=stepper, fastaFile=ref, max_depth=200000, **{"truncate": True}):
#
for pileup_read in pileup_column.pileups:
aln = pileup_read.alignment
read_name = aln.query_name
pair = 'pe1' if aln.is_read1 else 'pe2'
strand = '-' if aln.is_reverse else '+'
read = Read(read_name, pair, strand)
if pileup_read.is_del or pileup_read.is_refskip or (aln.flag > 1024) or (aln.mapping_quality < mapq) or \
aln.query_qualities[pileup_read.query_position] < baseq:
continue
start_reads[read] = [pileup_read.query_position, aln]

Reference

readthedoc

-------------本文结束感谢您的阅读-------------