6 rokov pred · ee602bf65f
--- a/src/distributer/collect_compressed.py
+++ b/src/distributer/collect_compressed.py
@@ -6,14 +6,14 @@ import numpy as np
 
				 EXTRACTION_PARAMS = {
			
 
				     "-a": ("Small Paste Size (Blocks)", int, np.arange(1,7)),
			
 
				     "-p": ("Large Paste Size (Blocks)", lambda x: int(x.split(".", 1)[0]),
			
 
				-           np.arange(2,8)),
			
 
				+           np.arange(2,11)),
			
 
				     "-l": ("Low Activity Threshold (k/s)", lambda x: float(x) - 0.001,
			
 
				-           np.arange(1,7)),
			
 
				-    "-i": ("High Activity Threshold (k/s)", lambda x: float(x) + 0.001,
			
 
				-           [1.5, 2, 2.5, 3, 3.5, 4, 4.5]),
			
 
				-    "-b": ("Lookback (s)", int, np.arange(1,7)),
			
 
				-    "-s": ("Sample Size (Count)", int, [100, 150, 200, 300, 400, 500, 600]),
			
 
				-    "-m": ("Minimum Number of Samples (Samples/Tag)", int, [5, 10, 15, 20, 25])
			
 
				+           np.arange(0.5, 7, 0.5)),
			
 
				+    "-i": ("High Activity Threshold (k/s)", lambda x: float(x) + 0.01,
			
 
				+           [1.5, 2, 2.5, 2.75, 3, 3.5, 4, 4.5, 5, 6, 7]),
			
 
				+    "-b": ("Lookback (s)", float, np.arange(1, 7.25, 0.25)),
			
 
				+    "-s": ("Sample Size (Count)", int, [100, 150, 175, 200, 300, 400, 500, 600, 700]),
			
 
				+    "-m": ("Minimum Number of Samples (Samples/Tag)", int, [5, 10, 15, 17, 20, 25])
			
 
				 }
			
 
				 
			
 
				 def main(options: list):
			
@@ -151,7 +151,7 @@ def parse_args(args: list):
 
				     parser.add_argument('-v', '--verbose', action="count", default=0,
			
 
				                         help='Show more information')
			
 
				     parser.add_argument('-c', '--compression', default="bz2",
			
 
				-                        choices=["bz2", "gzip", "lzma", "zipfile", None],
			
 
				+                        choices=["bz2", "gzip", "lzma", "zipfile", "None"],
			
 
				                         help='Compression algorithm to use. (default: bzip2)')
			
 
				     try:
			
 
				         import seaborn
			
@@ -173,6 +173,8 @@ def parse_args(args: list):
 
				     elif unknown:
			
 
				         parser.print_help()
			
 
				         exit(2)
			
 
				+    if res.compression == "None":
			
 
				+        vars(res)["compression"] = None
			
 
				     return res
			
 
				 
			
 
				 if __name__ == '__main__':
			
--- a/src/distributer/distribute_compressor.py
+++ b/src/distributer/distribute_compressor.py
@@ -12,7 +12,7 @@ def main(options: list):
 
				     import heapq, signal
			
 
				     def dump(sig, frame):
			
 
				         print("Dumping to file %s." % args.out_file, file=sys.stderr)
			
 
				-        pickle.dump(heap, args.out_file)
			
 
				+        pickle.dump(heap, open(args.out_file, "wb"))
			
 
				     signal.signal(signal.SIGUSR1, dump)
			
 
				     def dump_exit(sig, frame, i):
			
 
				         dump(sig, frame)
			
@@ -20,7 +20,7 @@ def main(options: list):
 
				     signal.signal(signal.SIGTERM, lambda sig, frame: dumpexit(sig, frame, 3))
			
 
				     if args.classifier == "nearest-neighbor":
			
 
				         import nearestneighbors as classifier
			
 
				-    if args.classifier == "random-forest":
			
 
				+    elif args.classifier == "random-forest":
			
 
				         import randomforest as classifier
			
 
				     if args.final_statistic == "median":
			
 
				         avg_fn = lambda values: np.median([classifier.classify(*values)
			
@@ -47,8 +47,6 @@ def process_options(line: str, classifier, avg_fn):
 
				                        zip(*[(FeatureVector(p, features).get(), p.user)
			
 
				                              for p in samples]))
			
 
				     runs = avg_fn((data, labels, num_users, options))
			
 
				-    #print(runs, file=sys.stderr)
			
 
				-    #write_to_file(args.out_file, args.compression, )
			
 
				     filename = options.features_file.name
			
 
				     del options.features_file
			
 
				     return (*runs, filename, options)
			
--- a/src/distributer/rerunner.py
+++ b/src/distributer/rerunner.py
@@ -0,0 +1,65 @@
 
				+#!/home/tflucke/bin/bin/python3
			
 
				+import os, sys, typing, pickle, numpy as np
			
 
				+sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
			
 
				+                '/../classifiers/')
			
 
				+from Vector import FeatureVector
			
 
				+
			
 
				+def main(options: list):
			
 
				+    args = parse_args(options)
			
 
				+    if args.classifier == "nearest-neighbor":
			
 
				+        import nearestneighbors as classifier
			
 
				+    elif args.classifier == "random-forest":
			
 
				+        import randomforest as classifier
			
 
				+    if args.final_statistic == "median":
			
 
				+        avg_fn = lambda values: np.median([classifier.classify(*values)
			
 
				+                                    for i in range(0, args.reruns)], 0)
			
 
				+    else:
			
 
				+        avg_fn = lambda values: np.average([classifier.classify(*values)
			
 
				+                                    for i in range(0, args.reruns)], 0)
			
 
				+    heaps = [load_file(f, args.compression) for f in args.in_files]
			
 
				+    import heapq
			
 
				+    heap = list(heapq.merge(*heaps))
			
 
				+    for (_, _, filename, features) in heapq.nlargest(args.top_n, heap):
			
 
				+        options = classifier.parse_args([filename])
			
 
				+        samples = pickle.load(open(filename, "rb"))
			
 
				+        num_users = len(np.unique([s.user for s in samples]))
			
 
				+        data, labels = map(np.array,
			
 
				+                           zip(*[(FeatureVector(p, features).get(), p.user)
			
 
				+                                 for p in samples]))
			
 
				+        runs = avg_fn((data, labels, num_users, options))
			
 
				+        print("Accuracy: %0.04f; P-Value: %0.05f; File: %80s; Options: %s" %
			
 
				+              (*runs, filename, features))
			
 
				+
			
 
				+def load_file(filename: str, compression=None):
			
 
				+    import compress_pickle
			
 
				+    return [r[0:3] + (r[3].feature,)
			
 
				+            for r in compress_pickle.load(filename, compression=compression)]
			
 
				+
			
 
				+def parse_args(args: list):
			
 
				+    import argparse
			
 
				+    parser = argparse.ArgumentParser(description='Rerun previous configurations.')
			
 
				+    parser.add_argument('classifier', choices=["nearest-neighbor", "random-forest"],
			
 
				+                        help='Classifier to use.')
			
 
				+    parser.add_argument('in_files', nargs='+', type=str, #argparse.FileType('wb')
			
 
				+                        help='Output file name.')
			
 
				+    parser.add_argument('-n', '--top-n', type=int, default=20,
			
 
				+                        help='Re-run top N configurations (default: 20)')
			
 
				+    parser.add_argument('-r', '--reruns', type=int, default=50,
			
 
				+                        help='Number of times to rerun a sample set. \
			
 
				+                        (default: 50)')
			
 
				+    parser.add_argument('-f', '--final-statistic', choices=["mean", "median"],
			
 
				+                        default="median", help='Final statistic to show. \
			
 
				+                        (default: median)')
			
 
				+    parser.add_argument('-v', '--verbose', action="count", default=0,
			
 
				+                        help='Show more information')
			
 
				+    parser.add_argument('-c', '--compression', default="None",
			
 
				+                        choices=["bz2", "gzip", "lzma", "zipfile", "None"],
			
 
				+                        help='Compression algorithm to use. (default: None)')
			
 
				+    res = parser.parse_args(args)
			
 
				+    if res.compression == "None":
			
 
				+        vars(res)["compression"] = None
			
 
				+    return res
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    import sys
			
 
				+    main(sys.argv[1:])