|
|
@@ -0,0 +1,65 @@
|
|
|
+#!/home/tflucke/bin/bin/python3
|
|
|
+import os, sys, typing, pickle, numpy as np
|
|
|
+sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
|
|
|
+ '/../classifiers/')
|
|
|
+from Vector import FeatureVector
|
|
|
+
|
|
|
+def main(options: list):
|
|
|
+ args = parse_args(options)
|
|
|
+ if args.classifier == "nearest-neighbor":
|
|
|
+ import nearestneighbors as classifier
|
|
|
+ elif args.classifier == "random-forest":
|
|
|
+ import randomforest as classifier
|
|
|
+ if args.final_statistic == "median":
|
|
|
+ avg_fn = lambda values: np.median([classifier.classify(*values)
|
|
|
+ for i in range(0, args.reruns)], 0)
|
|
|
+ else:
|
|
|
+ avg_fn = lambda values: np.average([classifier.classify(*values)
|
|
|
+ for i in range(0, args.reruns)], 0)
|
|
|
+ heaps = [load_file(f, args.compression) for f in args.in_files]
|
|
|
+ import heapq
|
|
|
+ heap = list(heapq.merge(*heaps))
|
|
|
+ for (_, _, filename, features) in heapq.nlargest(args.top_n, heap):
|
|
|
+ options = classifier.parse_args([filename])
|
|
|
+ samples = pickle.load(open(filename, "rb"))
|
|
|
+ num_users = len(np.unique([s.user for s in samples]))
|
|
|
+ data, labels = map(np.array,
|
|
|
+ zip(*[(FeatureVector(p, features).get(), p.user)
|
|
|
+ for p in samples]))
|
|
|
+ runs = avg_fn((data, labels, num_users, options))
|
|
|
+ print("Accuracy: %0.04f; P-Value: %0.05f; File: %80s; Options: %s" %
|
|
|
+ (*runs, filename, features))
|
|
|
+
|
|
|
+def load_file(filename: str, compression=None):
|
|
|
+ import compress_pickle
|
|
|
+ return [r[0:3] + (r[3].feature,)
|
|
|
+ for r in compress_pickle.load(filename, compression=compression)]
|
|
|
+
|
|
|
+def parse_args(args: list):
|
|
|
+ import argparse
|
|
|
+ parser = argparse.ArgumentParser(description='Rerun previous configurations.')
|
|
|
+ parser.add_argument('classifier', choices=["nearest-neighbor", "random-forest"],
|
|
|
+ help='Classifier to use.')
|
|
|
+ parser.add_argument('in_files', nargs='+', type=str, #argparse.FileType('wb')
|
|
|
+ help='Output file name.')
|
|
|
+ parser.add_argument('-n', '--top-n', type=int, default=20,
|
|
|
+ help='Re-run top N configurations (default: 20)')
|
|
|
+ parser.add_argument('-r', '--reruns', type=int, default=50,
|
|
|
+ help='Number of times to rerun a sample set. \
|
|
|
+ (default: 50)')
|
|
|
+ parser.add_argument('-f', '--final-statistic', choices=["mean", "median"],
|
|
|
+ default="median", help='Final statistic to show. \
|
|
|
+ (default: median)')
|
|
|
+ parser.add_argument('-v', '--verbose', action="count", default=0,
|
|
|
+ help='Show more information')
|
|
|
+ parser.add_argument('-c', '--compression', default="None",
|
|
|
+ choices=["bz2", "gzip", "lzma", "zipfile", "None"],
|
|
|
+ help='Compression algorithm to use. (default: None)')
|
|
|
+ res = parser.parse_args(args)
|
|
|
+ if res.compression == "None":
|
|
|
+ vars(res)["compression"] = None
|
|
|
+ return res
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ import sys
|
|
|
+ main(sys.argv[1:])
|