2 """Select a fraction of rows from a file
9 def main(cmdline=None):
10 parser = make_parser()
11 args = parser.parse_args(cmdline)
14 random.seed(args.seed)
17 instream = open(args.filename[0], 'rt')
19 parser.error("Please specify input filename")
22 outstream = open(args.output, 'wt')
24 outstream = sys.stdout
26 for line in subset(instream, args.header, args.include):
33 parser = argparse.ArgumentParser()
34 parser.add_argument("filename", nargs=1,
35 help="filename to read from")
36 parser.add_argument("-o", "--output",
37 help="output filename")
38 parser.add_argument("-i", "--include", default=0.10, type=float,
39 help="probability to include a line [0..1]")
40 parser.add_argument("--header", default=1, type=int,
41 help="number of header lines to include")
42 parser.add_argument("-s", "--seed",
47 def subset(instream, header_lines, include_fraction):
48 """Subset lines from a file
50 Always include the first specified number of 'header_lines'
51 then after that include lines if they meet the random threshold
54 while header_lines > 0:
59 if random.random() < include_fraction:
62 if __name__ == "__main__":