Source code for scripts.parseFastaNames

import fnmatch
import os
import argparse
import sys



[docs]def main(): """ Input: fasta with sequence id that has *bar*isodate suffix Output: metadat file that can be input into nextstrain (as downloaded 2021-02-01) fasta with *bar* to "_" """ # parse user arguments parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-iF', '--inFasta', required=True, type=str, help="full metadata file with ISO date after last '|' in name") parser.add_argument('-oM', '--outMeta', required=True, type=str, help="output for metadata file") parser.add_argument('-oF', '--outFasta', required=True, type=str, help="output for fasta file") parser.add_argument('-p', '--inPangolin', required=False, type=str, help="pangolin output lineage_report.csv file, if argument not supplied adds ? in 'lineage' col") args = parser.parse_args() inFasta_open = open(args.inFasta, "r") outFasta_open = open(args.outFasta, "w") outMeta_open = open(args.outMeta, "w") lineage_d = {} if args.inPangolin is not None: haveLineage = True inPangolin_open = open(args.inPangolin, "r") for line in inPangolin_open: line_l = line.strip().split(",") name = line_l[0].replace("|", "_") lineage = line_l[1] lineage_d[name] = lineage else: haveLineage = False outMetaLine = "strain virus date region country_exposure date_submitted lineage\n" outMeta_open.write(outMetaLine) inName = 'NA' for line in inFasta_open: if ">" in line: if inName != 'NA' and 'Wuhan' not in inName: outFasta_open.write(outFastaLine) outFasta_open.write(seq) outMeta_open.write(outMetaLine) inName = line.strip()[1:] date = inName.split("|")[-1] outName = inName.replace("|", "_") outFastaLine = ">" + outName + "\n" if haveLineage: outMetaLine = " ".join([outName, "ncov", date, "North America", "? 2030-03-22", lineage_d[outName] +"\n"]) else: outMetaLine = " ".join([outName, "ncov", date, "North America", "? 2030-03-22 ?\n"]) seq = "" else: seq += line outFasta_open.write(outFastaLine) outFasta_open.write(seq) outMeta_open.write(outMetaLine)
if __name__ == "__main__": main()