|
#!/usr/bin python
|
|
|
|
import gzip
|
|
|
|
def is_header(line):
|
|
return line.startswith('#')
|
|
|
|
def get_GT(line):
|
|
fields = line.strip().split('\t')
|
|
FORMAT, sample = fields[8], fields[9]
|
|
FORMAT_fields = FORMAT.split(':')
|
|
sample_fields = sample.split(':')
|
|
|
|
GT_index = FORMAT_fields.index('GT')
|
|
GT = sample_fields[GT_index]
|
|
return GT
|
|
|
|
def is_phased_het(line):
|
|
GT = get_GT(line)
|
|
|
|
if '|' in GT:
|
|
GT_fields = GT.split('|')
|
|
return GT_fields[0] != GT_fields[1]
|
|
else:
|
|
return False
|
|
|
|
def is_phase_switch(bline, cline):
|
|
bGT = get_GT(bline)
|
|
cGT = get_GT(cline)
|
|
|
|
if '|' in bGT and '|' in cGT:
|
|
bGT_fields = bGT.split('|')
|
|
cGT_fields = cGT.split('|')
|
|
if bGT_fields[0] != bGT_fields[1] and cGT_fields[0] != cGT_fields[1]:
|
|
return bGT_fields[0] == cGT_fields[1] and bGT_fields[1] == cGT_fields[0]
|
|
else:
|
|
return False
|
|
else:
|
|
return False
|
|
|
|
def print_switch_errors(bfile, cfile):
|
|
b = gzip.open(bfile) if bfile.endswith(".gz") else open(bfile)
|
|
c = gzip.open(cfile) if cfile.endswith(".gz") else open(cfile)
|
|
|
|
bline = b.readline()
|
|
cline = c.readline()
|
|
|
|
last_het_is_switch = False
|
|
|
|
while not (bline == '' or cline == ''):
|
|
if is_header(bline):
|
|
bline = b.readline()
|
|
elif is_header(cline):
|
|
cline = c.readline()
|
|
else:
|
|
bfields = bline.strip().split('\t')
|
|
cfields = cline.strip().split('\t')
|
|
bCHROM, bPOS = bfields[0], int(bfields[1])
|
|
cCHROM, cPOS = cfields[0], int(cfields[1])
|
|
if bCHROM < cCHROM or (bCHROM == cCHROM and bPOS < cPOS):
|
|
if is_phased_het(bline):
|
|
if last_het_is_switch:
|
|
print bline,
|
|
last_het_is_switch = False
|
|
bline = b.readline()
|
|
elif bCHROM == cCHROM and bPOS == cPOS:
|
|
if is_phased_het(bline):
|
|
if last_het_is_switch:
|
|
if not is_phase_switch(bline, cline):
|
|
print bline,
|
|
last_het_is_switch = False
|
|
else:
|
|
if is_phase_switch(bline, cline):
|
|
print bline,
|
|
last_het_is_switch = True
|
|
bline = b.readline()
|
|
cline = c.readline()
|
|
else:
|
|
cline = c.readline()
|
|
|
|
b.close()
|
|
c.close()
|
|
|
|
def main():
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('bfile', metavar='BFILE', help='baseline file')
|
|
parser.add_argument('cfile', metavar='CFILE', help='calls file')
|
|
args = parser.parse_args()
|
|
|
|
print_switch_errors(args.bfile, args.cfile)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|