6 anos atrás · 0e2a9b753f
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ data/*/
 
															 src/flow-seperator/flow-seperator
														
 
															 src/pcap-matcher/pcap-matcher
														
 
															 src/packet-matcher/packet-matcher
														
 
															+src/distributer/fixed-read
														
 
															 src/common/*.a
														
 
															 data/keylog-matchings.txt
														
 
															 **/__pycache__/
														
--- a/src/classifiers/nearestneighbors.py
+++ b/src/classifiers/nearestneighbors.py
@@ -1,43 +1,38 @@
 
															 #!/usr/bin/python3
														
 
															+
														
 
															 from sklearn.model_selection import KFold
														
 
															 import numpy as np
														
 
															+import typing
														
 
															 try:
														
 
															     import sample
														
 
															 except ImportError:
														
 
															-    import os
														
 
															-    import sys
														
 
															+    import os, sys
														
 
															     sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
														
 
															                     '/../feature-extractor')
														
 
															     import sample
														
 
															 DEFAULT_FEATURES = ["average_iat", "high.avg_burst_size", "high.burst_count"]
														
 
															-def main():
														
 
															-    # a test of this method using an arbitrarily generated list of 5 vectors with
														
 
															-    # 3 features each
														
 
															-    # nearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]], [[1, 1, 4]])
														
 
															-    args = parse_args()
														
 
															+def main(options: list):
														
 
															+    args = parse_args(options)
														
 
															     try:
														
 
															         import cPickle as pickle
														
 
															     except:
														
 
															         import pickle
														
 
															     samples = pickle.load(args.features_file)
														
 
															-    from random import shuffle
														
 
															-    shuffle(samples)
														
 
															+    num_users=len(np.unique([s.user for s in samples]))
														
 
															+    assert(num_users >= args.min_users)
														
 
															     features = args.feature if args.feature else DEFAULT_FEATURES
														
 
															     from Vector import FeatureVector
														
 
															-    data, labels = zip(*[(FeatureVector(p, features).get(), p.user)
														
 
															-                         for p in samples])
														
 
															-    res = kNearestNeighbors(np.array(data), np.array(labels),
														
 
															-                            n=args.folds, verbose=args.verbose, k=args.k_neighbors,
														
 
															-                            weights=args.weight, guesses=args.top)
														
 
															-    print("Overall Accuracy: %f" % np.average(res))
														
 
															+    data, labels = map(np.array,
														
 
															+                       zip(*[(FeatureVector(p, features).get(), p.user)
														
 
															+                             for p in samples]))
														
 
															+    avg, p = classify(data, labels, num_users, args)
														
 
															+    print("Overall Accuracy: %f" % avg)
														
 
															     if args.p_value:
														
 
															-        _, p = t_test(res, labels)
														
 
															-        print("P-Value: %f" % (p / 2))
														
 
															-
														
 
															+        print("P-Value: %f" % p)
														
 
															-def parse_args():
														
 
															+def parse_args(args: list):
														
 
															     import argparse
														
 
															     parser = argparse.ArgumentParser(
														
 
															         description='Run a data set through a kNearestNeighbors classifier.')
														
@@ -59,8 +54,18 @@ def parse_args():
 
															     parser.add_argument('-t', '--top', type=int, default=1,
														
 
															                         help='Number of guesses to be considered \"correct\" \
														
 
															                         (default: 1)')
														
 
															-    return parser.parse_args()
														
 
															-
														
 
															+    parser.add_argument('-m', '--min-users', type=int, default=10,
														
 
															+                        help='Minimum number of unique users to consider a sample\
														
 
															+                        file valid. (default: 10)')
														
 
															+    return parser.parse_args(args)
														
 
															+
														
 
															+def classify(data, labels, num_users: int, args):
														
 
															+    s = np.arange(data.shape[0])
														
 
															+    np.random.shuffle(s)
														
 
															+    res = kNearestNeighbors(data[s], labels[s],
														
 
															+                            n=args.folds, verbose=args.verbose, k=args.k_neighbors,
														
 
															+                            weights=args.weight, guesses=args.top)
														
 
															+    return (np.average(res), t_test(res, num_users)[1] / 2)
														
 
															 def kNearestNeighbors(data: list, labels: list,
														
 
															                       n=5, verbose=0, k=5, weights="uniform", guesses=1):
														
@@ -140,11 +145,12 @@ def find_in_predictions(probabilities: list, tests: int, labels: list):
 
															             for probs, test in zip(probabilities, tests)]
														
 
															-def t_test(accuracy: list, labels: list):
														
 
															+def t_test(accuracy: list, num_users: int):
														
 
															     from scipy import stats
														
 
															-    random_avg = 1.0/len(np.unique(labels))
														
 
															+    random_avg = 1.0/num_users
														
 
															     return stats.ttest_1samp(accuracy, random_avg)
														
 
															 if __name__ == '__main__':
														
 
															-    main()
														
 
															+    import sys
														
 
															+    main(sys.argv[1:])
														
--- a/src/distributer/Makefile
+++ b/src/distributer/Makefile
@@ -0,0 +1,19 @@
 
															+TARGET=fixed-read
														
 
															+CC=gcc
														
 
															+LIBS=../common
														
 
															+CCFLAGS=-Wall -O2 -I$(LIBS)
														
 
															+LDFLAGS=-L$(LIBS)
														
 
															+
														
 
															+ODIR=obj
														
 
															+
														
 
															+SOURCES = $(wildcard *.c)
														
 
															+OBJECTS = $(patsubst %.c, $(ODIR)/%.o, $(SOURCES))
														
 
															+HEADERS = $(wildcard *.h)
														
 
															+
														
 
															+default: $(TARGET)
														
 
															+
														
 
															+$(TARGET): $(SOURCES)
														
 
															+	$(CC) $(CCFLAGS) $^ $(LDFLAGS) -o $@ -D"PROG_NAME=\"$@\""
														
 
															+
														
 
															+clean:
														
 
															+	rm -f $(TARGET) $(ODIR)/*.o *~
														
--- a/src/distributer/distribute.sh
+++ b/src/distributer/distribute.sh
@@ -2,10 +2,11 @@
 
															 readonly DEFAULT_OUT_FMT="%s.out"
														
 
															 readonly CMD_FEED="$(mktemp -u /tmp/distributer-XXX.fifo)"
														
 
															-readonly LOCK="$CMD_FEED.lock"
														
 
															-readonly LOCK_TIMEOUT="1"
														
 
															 readonly PROC_BUFFER=10
														
 
															 readonly MAX_PROCS=$(expr $(ulimit -u) / 3 - $PROC_BUFFER)
														
 
															+readonly MAX_CMD_SIZE=350
														
 
															+readonly READER="$(dirname $(realpath $0))/fixed-read"
														
 
															+readonly TIMEOUT=600 # 10 minutes
														
 
															 readonly CONF_LIST="$1"
														
 
															 readonly SERVER_LIST="$2"
														
@@ -40,18 +41,26 @@ clean_server() {
 
															 run_server() {
														
 
															     server="$1"
														
 
															-    loop="/tmp/$(basename $CMD_FEED .fifo)-$server.fifo"
														
 
															+    loop="$(mktemp -u /tmp/$(basename $CMD_FEED .fifo)-$server-XXX.fifo)"
														
 
															+    out_file=$(printf "$OUT_FMT" "$(basename "$loop" .fifo)")
														
 
															     mkfifo "$loop"
														
 
															     trap "clean_server $loop" 2 15
														
 
															-    while lockfile -$LOCK_TIMEOUT "$LOCK"; read cmd; do
														
 
															-        rm -f "$LOCK"
														
 
															-        cmd_sanitized="$(echo "$cmd" | sed "$S_SPACE;$R_DASH;$S_QUOTE")"
														
 
															-        out_file="$(printf "$OUT_FMT" "$cmd_sanitized")"
														
 
															-        printf "$server: $cmd\n" >&2
														
 
															-        printf "$cmd > $out_file\necho\n"
														
 
															-        read line < "$loop" > /dev/null # Block until command completes
														
 
															-    done | ssh -oBatchMode=yes -oStrictHostKeyChecking=no "$server" "sh" > "$loop"
														
 
															-    rm -f "$LOCK"
														
 
															+    {
														
 
															+        printf "echo > %s\necho\n" "$out_file"
														
 
															+        # Block until command completes
														
 
															+        while read -t $TIMEOUT line < "$loop" > /dev/null && \
														
 
															+            cmd=$("$READER" $MAX_CMD_SIZE)
														
 
															+        do
														
 
															+            cmd=$(printf "%s" "$cmd" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
														
 
															+            #cmd_sanitized="$(echo $cmd | sed "$S_SPACE;$R_DASH;$S_QUOTE")"
														
 
															+            #out_file="$(printf "$OUT_FMT" $cmd_sanitized)"
														
 
															+            printf "$server: %s\n" "$cmd" >&2
														
 
															+            #printf "%s > %s\necho\n" "$cmd" "$out_file"
														
 
															+            printf "printf \"%s: \" >> %s\n" "$cmd"  "$out_file"
														
 
															+            printf "%s >> %s\n" "$cmd"  "$out_file"
														
 
															+            printf "echo\n"
														
 
															+        done
														
 
															+    } | ssh -oBatchMode=yes -oStrictHostKeyChecking=no "$server" "sh" > "$loop"
														
 
															     clean_server "$loop"
														
 
															     echo "Server '$server' finished!" >&2
														
 
															 }
														
@@ -61,15 +70,14 @@ clean_up() {
 
															         pkill -P $pid
														
 
															     done
														
 
															     rm "$CMD_FEED"
														
 
															-    [ -e "$LOCK" ] && rm -f "$LOCK"
														
 
															     exit 2
														
 
															 }
														
 
															 main() {
														
 
															     mkfifo "$CMD_FEED"
														
 
															     trap clean_up 2 15
														
 
															-    cat "$CONF_LIST" | sed '/^[[:space:]]*$/d' > "$CMD_FEED" &
														
 
															-    pids=""
														
 
															+    cat "$CONF_LIST" | sed '/^[[:space:]]*$/d' | \
														
 
															+        xargs -d'\n' printf "%-$MAX_CMD_SIZE.${MAX_CMD_SIZE}s" > "$CMD_FEED" &
														
 
															     for server in $(head -n$MAX_PROCS "$SERVER_LIST"); do
														
 
															         run_server "$server" < "$CMD_FEED" > /dev/null &
														
 
															     done
														
--- a/src/distributer/fixed-read.c
+++ b/src/distributer/fixed-read.c
@@ -0,0 +1,32 @@
 
															+#include <unistd.h>
														
 
															+#include <stdio.h>
														
 
															+
														
 
															+#ifndef PROG_NAME
														
 
															+#define PROG_NAME "a.out"
														
 
															+#endif
														
 
															+
														
 
															+void help() {
														
 
															+  fprintf(stderr, "Usage: %s byte_count\n", PROG_NAME);
														
 
															+  fprintf(stderr, "    Reads a byte_count bytes and outputs them.\n");
														
 
															+}
														
 
															+
														
 
															+int fixed_read(size_t n) {
														
 
															+  char buff[n];
														
 
															+  int err = read(STDIN_FILENO, buff, n);
														
 
															+  if (-1 == err) {
														
 
															+    perror(PROG_NAME);
														
 
															+  }
														
 
															+  else if (-1 == write(STDOUT_FILENO, buff, err)) {
														
 
															+    perror(PROG_NAME);
														
 
															+  }
														
 
															+  return n != err;
														
 
															+}
														
 
															+
														
 
															+int main(int argn, char** argv) {
														
 
															+  size_t n;
														
 
															+  if (1 == argn || 1 != sscanf(argv[1], " %zu", &n)) {
														
 
															+    help();
														
 
															+    return 1;
														
 
															+  }
														
 
															+  return fixed_read(n);
														
 
															+}