ソースを参照

Merge branch 'master' of git.tflucke.name:tflucke/SSH-Master-Thesis

Thomas Flucke 6 年 前
コミット
0e2a9b753f

+ 1 - 0
.gitignore

@@ -13,6 +13,7 @@ data/*/
 src/flow-seperator/flow-seperator
 src/pcap-matcher/pcap-matcher
 src/packet-matcher/packet-matcher
+src/distributer/fixed-read
 src/common/*.a
 data/keylog-matchings.txt
 **/__pycache__/

+ 30 - 24
src/classifiers/nearestneighbors.py

@@ -1,43 +1,38 @@
 #!/usr/bin/python3
+
 from sklearn.model_selection import KFold
 import numpy as np
+import typing
 try:
     import sample
 except ImportError:
-    import os
-    import sys
+    import os, sys
     sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + \
                     '/../feature-extractor')
     import sample
 
 DEFAULT_FEATURES = ["average_iat", "high.avg_burst_size", "high.burst_count"]
 
-def main():
-    # a test of this method using an arbitrarily generated list of 5 vectors with
-    # 3 features each
-    # nearestNeighbors([[1, 1, 0], [1, 0, 0], [0, 0, 0], [0, 5, 5]], [[1, 1, 4]])
-    args = parse_args()
+def main(options: list):
+    args = parse_args(options)
     try:
         import cPickle as pickle
     except:
         import pickle
     samples = pickle.load(args.features_file)
-    from random import shuffle
-    shuffle(samples)
+    num_users=len(np.unique([s.user for s in samples]))
+    assert(num_users >= args.min_users)
     features = args.feature if args.feature else DEFAULT_FEATURES
     from Vector import FeatureVector
-    data, labels = zip(*[(FeatureVector(p, features).get(), p.user)
-                         for p in samples])
-    res = kNearestNeighbors(np.array(data), np.array(labels),
-                            n=args.folds, verbose=args.verbose, k=args.k_neighbors,
-                            weights=args.weight, guesses=args.top)
-    print("Overall Accuracy: %f" % np.average(res))
+    data, labels = map(np.array,
+                       zip(*[(FeatureVector(p, features).get(), p.user)
+                             for p in samples]))
+    avg, p = classify(data, labels, num_users, args)
+    print("Overall Accuracy: %f" % avg)
     if args.p_value:
-        _, p = t_test(res, labels)
-        print("P-Value: %f" % (p / 2))
-
+        print("P-Value: %f" % p)
 
-def parse_args():
+def parse_args(args: list):
     import argparse
     parser = argparse.ArgumentParser(
         description='Run a data set through a kNearestNeighbors classifier.')
@@ -59,8 +54,18 @@ def parse_args():
     parser.add_argument('-t', '--top', type=int, default=1,
                         help='Number of guesses to be considered \"correct\" \
                         (default: 1)')
-    return parser.parse_args()
-
+    parser.add_argument('-m', '--min-users', type=int, default=10,
+                        help='Minimum number of unique users to consider a sample\
+                        file valid. (default: 10)')
+    return parser.parse_args(args)
+
+def classify(data, labels, num_users: int, args):
+    s = np.arange(data.shape[0])
+    np.random.shuffle(s)
+    res = kNearestNeighbors(data[s], labels[s],
+                            n=args.folds, verbose=args.verbose, k=args.k_neighbors,
+                            weights=args.weight, guesses=args.top)
+    return (np.average(res), t_test(res, num_users)[1] / 2)
 
 def kNearestNeighbors(data: list, labels: list,
                       n=5, verbose=0, k=5, weights="uniform", guesses=1):
@@ -140,11 +145,12 @@ def find_in_predictions(probabilities: list, tests: int, labels: list):
             for probs, test in zip(probabilities, tests)]
 
 
-def t_test(accuracy: list, labels: list):
+def t_test(accuracy: list, num_users: int):
     from scipy import stats
-    random_avg = 1.0/len(np.unique(labels))
+    random_avg = 1.0/num_users
     return stats.ttest_1samp(accuracy, random_avg)
 
 
 if __name__ == '__main__':
-    main()
+    import sys
+    main(sys.argv[1:])

+ 19 - 0
src/distributer/Makefile

@@ -0,0 +1,19 @@
+TARGET=fixed-read
+CC=gcc
+LIBS=../common
+CCFLAGS=-Wall -O2 -I$(LIBS)
+LDFLAGS=-L$(LIBS)
+
+ODIR=obj
+
+SOURCES = $(wildcard *.c)
+OBJECTS = $(patsubst %.c, $(ODIR)/%.o, $(SOURCES))
+HEADERS = $(wildcard *.h)
+
+default: $(TARGET)
+
+$(TARGET): $(SOURCES)
+	$(CC) $(CCFLAGS) $^ $(LDFLAGS) -o $@ -D"PROG_NAME=\"$@\""
+
+clean:
+	rm -f $(TARGET) $(ODIR)/*.o *~

+ 23 - 15
src/distributer/distribute.sh

@@ -2,10 +2,11 @@
 
 readonly DEFAULT_OUT_FMT="%s.out"
 readonly CMD_FEED="$(mktemp -u /tmp/distributer-XXX.fifo)"
-readonly LOCK="$CMD_FEED.lock"
-readonly LOCK_TIMEOUT="1"
 readonly PROC_BUFFER=10
 readonly MAX_PROCS=$(expr $(ulimit -u) / 3 - $PROC_BUFFER)
+readonly MAX_CMD_SIZE=350
+readonly READER="$(dirname $(realpath $0))/fixed-read"
+readonly TIMEOUT=600 # 10 minutes
 
 readonly CONF_LIST="$1"
 readonly SERVER_LIST="$2"
@@ -40,18 +41,26 @@ clean_server() {
 
 run_server() {
     server="$1"
-    loop="/tmp/$(basename $CMD_FEED .fifo)-$server.fifo"
+    loop="$(mktemp -u /tmp/$(basename $CMD_FEED .fifo)-$server-XXX.fifo)"
+    out_file=$(printf "$OUT_FMT" "$(basename "$loop" .fifo)")
     mkfifo "$loop"
     trap "clean_server $loop" 2 15
-    while lockfile -$LOCK_TIMEOUT "$LOCK"; read cmd; do
-        rm -f "$LOCK"
-        cmd_sanitized="$(echo "$cmd" | sed "$S_SPACE;$R_DASH;$S_QUOTE")"
-        out_file="$(printf "$OUT_FMT" "$cmd_sanitized")"
-        printf "$server: $cmd\n" >&2
-        printf "$cmd > $out_file\necho\n"
-        read line < "$loop" > /dev/null # Block until command completes
-    done | ssh -oBatchMode=yes -oStrictHostKeyChecking=no "$server" "sh" > "$loop"
-    rm -f "$LOCK"
+    {
+        printf "echo > %s\necho\n" "$out_file"
+        # Block until command completes
+        while read -t $TIMEOUT line < "$loop" > /dev/null && \
+            cmd=$("$READER" $MAX_CMD_SIZE)
+        do
+            cmd=$(printf "%s" "$cmd" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
+            #cmd_sanitized="$(echo $cmd | sed "$S_SPACE;$R_DASH;$S_QUOTE")"
+            #out_file="$(printf "$OUT_FMT" $cmd_sanitized)"
+            printf "$server: %s\n" "$cmd" >&2
+            #printf "%s > %s\necho\n" "$cmd" "$out_file"
+            printf "printf \"%s: \" >> %s\n" "$cmd"  "$out_file"
+            printf "%s >> %s\n" "$cmd"  "$out_file"
+            printf "echo\n"
+        done
+    } | ssh -oBatchMode=yes -oStrictHostKeyChecking=no "$server" "sh" > "$loop"
     clean_server "$loop"
     echo "Server '$server' finished!" >&2
 }
@@ -61,15 +70,14 @@ clean_up() {
         pkill -P $pid
     done
     rm "$CMD_FEED"
-    [ -e "$LOCK" ] && rm -f "$LOCK"
     exit 2
 }
 
 main() {
     mkfifo "$CMD_FEED"
     trap clean_up 2 15
-    cat "$CONF_LIST" | sed '/^[[:space:]]*$/d' > "$CMD_FEED" &
-    pids=""
+    cat "$CONF_LIST" | sed '/^[[:space:]]*$/d' | \
+        xargs -d'\n' printf "%-$MAX_CMD_SIZE.${MAX_CMD_SIZE}s" > "$CMD_FEED" &
     for server in $(head -n$MAX_PROCS "$SERVER_LIST"); do
         run_server "$server" < "$CMD_FEED" > /dev/null &
     done

+ 32 - 0
src/distributer/fixed-read.c

@@ -0,0 +1,32 @@
+#include <unistd.h>
+#include <stdio.h>
+
+#ifndef PROG_NAME
+#define PROG_NAME "a.out"
+#endif
+
+void help() {
+  fprintf(stderr, "Usage: %s byte_count\n", PROG_NAME);
+  fprintf(stderr, "    Reads a byte_count bytes and outputs them.\n");
+}
+
+int fixed_read(size_t n) {
+  char buff[n];
+  int err = read(STDIN_FILENO, buff, n);
+  if (-1 == err) {
+    perror(PROG_NAME);
+  }
+  else if (-1 == write(STDOUT_FILENO, buff, err)) {
+    perror(PROG_NAME);
+  }
+  return n != err;
+}
+
+int main(int argn, char** argv) {
+  size_t n;
+  if (1 == argn || 1 != sscanf(argv[1], " %zu", &n)) {
+    help();
+    return 1;
+  }
+  return fixed_read(n);
+}