Prechádzať zdrojové kódy

Updated tuning boundries and fixed errors.

Tom Flucke 6 rokov pred
rodič
commit
ee602bf65f

+ 10 - 8
src/distributer/collect_compressed.py

@@ -6,14 +6,14 @@ import numpy as np
 EXTRACTION_PARAMS = {
     "-a": ("Small Paste Size (Blocks)", int, np.arange(1,7)),
     "-p": ("Large Paste Size (Blocks)", lambda x: int(x.split(".", 1)[0]),
-           np.arange(2,8)),
+           np.arange(2,11)),
     "-l": ("Low Activity Threshold (k/s)", lambda x: float(x) - 0.001,
-           np.arange(1,7)),
-    "-i": ("High Activity Threshold (k/s)", lambda x: float(x) + 0.001,
-           [1.5, 2, 2.5, 3, 3.5, 4, 4.5]),
-    "-b": ("Lookback (s)", int, np.arange(1,7)),
-    "-s": ("Sample Size (Count)", int, [100, 150, 200, 300, 400, 500, 600]),
-    "-m": ("Minimum Number of Samples (Samples/Tag)", int, [5, 10, 15, 20, 25])
+           np.arange(0.5, 7, 0.5)),
+    "-i": ("High Activity Threshold (k/s)", lambda x: float(x) + 0.01,
+           [1.5, 2, 2.5, 2.75, 3, 3.5, 4, 4.5, 5, 6, 7]),
+    "-b": ("Lookback (s)", float, np.arange(1, 7.25, 0.25)),
+    "-s": ("Sample Size (Count)", int, [100, 150, 175, 200, 300, 400, 500, 600, 700]),
+    "-m": ("Minimum Number of Samples (Samples/Tag)", int, [5, 10, 15, 17, 20, 25])
 }
 
 def main(options: list):
@@ -151,7 +151,7 @@ def parse_args(args: list):
     parser.add_argument('-v', '--verbose', action="count", default=0,
                         help='Show more information')
     parser.add_argument('-c', '--compression', default="bz2",
-                        choices=["bz2", "gzip", "lzma", "zipfile", None],
+                        choices=["bz2", "gzip", "lzma", "zipfile", "None"],
                         help='Compression algorithm to use. (default: bzip2)')
     try:
         import seaborn
@@ -173,6 +173,8 @@ def parse_args(args: list):
     elif unknown:
         parser.print_help()
         exit(2)
+    if res.compression == "None":
+        vars(res)["compression"] = None
     return res
 
 if __name__ == '__main__':

+ 2 - 4
src/distributer/distribute_compressor.py

@@ -12,7 +12,7 @@ def main(options: list):
     import heapq, signal
     def dump(sig, frame):
         print("Dumping to file %s." % args.out_file, file=sys.stderr)
-        pickle.dump(heap, args.out_file)
+        pickle.dump(heap, open(args.out_file, "wb"))
     signal.signal(signal.SIGUSR1, dump)
     def dump_exit(sig, frame, i):
         dump(sig, frame)
@@ -20,7 +20,7 @@ def main(options: list):
     signal.signal(signal.SIGTERM, lambda sig, frame: dumpexit(sig, frame, 3))
     if args.classifier == "nearest-neighbor":
         import nearestneighbors as classifier
-    if args.classifier == "random-forest":
+    elif args.classifier == "random-forest":
         import randomforest as classifier
     if args.final_statistic == "median":
         avg_fn = lambda values: np.median([classifier.classify(*values)
@@ -47,8 +47,6 @@ def process_options(line: str, classifier, avg_fn):
                        zip(*[(FeatureVector(p, features).get(), p.user)
                              for p in samples]))
     runs = avg_fn((data, labels, num_users, options))
-    #print(runs, file=sys.stderr)
-    #write_to_file(args.out_file, args.compression, )
     filename = options.features_file.name
     del options.features_file
     return (*runs, filename, options)

+ 65 - 0
src/distributer/rerunner.py

@@ -0,0 +1,65 @@
+#!/home/tflucke/bin/bin/python3
+import os, sys, typing, pickle, numpy as np
+sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
+                '/../classifiers/')
+from Vector import FeatureVector
+
+def main(options: list):
+    args = parse_args(options)
+    if args.classifier == "nearest-neighbor":
+        import nearestneighbors as classifier
+    elif args.classifier == "random-forest":
+        import randomforest as classifier
+    if args.final_statistic == "median":
+        avg_fn = lambda values: np.median([classifier.classify(*values)
+                                    for i in range(0, args.reruns)], 0)
+    else:
+        avg_fn = lambda values: np.average([classifier.classify(*values)
+                                    for i in range(0, args.reruns)], 0)
+    heaps = [load_file(f, args.compression) for f in args.in_files]
+    import heapq
+    heap = list(heapq.merge(*heaps))
+    for (_, _, filename, features) in heapq.nlargest(args.top_n, heap):
+        options = classifier.parse_args([filename])
+        samples = pickle.load(open(filename, "rb"))
+        num_users = len(np.unique([s.user for s in samples]))
+        data, labels = map(np.array,
+                           zip(*[(FeatureVector(p, features).get(), p.user)
+                                 for p in samples]))
+        runs = avg_fn((data, labels, num_users, options))
+        print("Accuracy: %0.04f; P-Value: %0.05f; File: %80s; Options: %s" %
+              (*runs, filename, features))
+
+def load_file(filename: str, compression=None):
+    import compress_pickle
+    return [r[0:3] + (r[3].feature,)
+            for r in compress_pickle.load(filename, compression=compression)]
+
+def parse_args(args: list):
+    import argparse
+    parser = argparse.ArgumentParser(description='Rerun previous configurations.')
+    parser.add_argument('classifier', choices=["nearest-neighbor", "random-forest"],
+                        help='Classifier to use.')
+    parser.add_argument('in_files', nargs='+', type=str, #argparse.FileType('wb')
+                        help='Output file name.')
+    parser.add_argument('-n', '--top-n', type=int, default=20,
+                        help='Re-run top N configurations (default: 20)')
+    parser.add_argument('-r', '--reruns', type=int, default=50,
+                        help='Number of times to rerun a sample set. \
+                        (default: 50)')
+    parser.add_argument('-f', '--final-statistic', choices=["mean", "median"],
+                        default="median", help='Final statistic to show. \
+                        (default: median)')
+    parser.add_argument('-v', '--verbose', action="count", default=0,
+                        help='Show more information')
+    parser.add_argument('-c', '--compression', default="None",
+                        choices=["bz2", "gzip", "lzma", "zipfile", "None"],
+                        help='Compression algorithm to use. (default: None)')
+    res = parser.parse_args(args)
+    if res.compression == "None":
+        vars(res)["compression"] = None
+    return res
+
+if __name__ == '__main__':
+    import sys
+    main(sys.argv[1:])