пре 6 година · 74dad6eb63
--- a/src/feature-extractor/common.py
+++ b/src/feature-extractor/common.py
@@ -0,0 +1,13 @@
 
				+from itertools import islice
			
 
				+
			
 
				+# From stackoverflow.com/questions/6822725/rolling-or-sliding-window-iterator
			
 
				+def window(seq, n=2):
			
 
				+    "Returns a sliding window (of width n) over data from the iterable"
			
 
				+    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
			
 
				+    it = iter(seq)
			
 
				+    result = tuple(islice(it, n))
			
 
				+    if len(result) == n:
			
 
				+        yield result
			
 
				+    for elem in it:
			
 
				+        result = result[1:] + (elem,)
			
 
				+        yield result
			
--- a/src/feature-extractor/extractor.py
+++ b/src/feature-extractor/extractor.py
@@ -17,12 +17,13 @@ def parse_args():
 
				     parser.add_argument('-s', '--sample-size', type=int,
			
 
				                         default="200", help='number of packets in a sample \
			
 
				                         (default: 200)')
			
 
				-    parser.add_argument('-l', '--low-act-threshold', type=float, default=1,
			
 
				+    parser.add_argument('-l', '--low-act-threshold', type=float, default=1.0,
			
 
				                         help='P/s below which is considered low activity \
			
 
				                         (default: 1.0 P/s)')
			
 
				-    parser.add_argument('-i', '--high-act-threshold', type=float, default=3,
			
 
				+    # Average typing speed is 3 keys/second
			
 
				+    parser.add_argument('-i', '--high-act-threshold', type=float, default=2.7,
			
 
				                         help='P/s above which is considered high activity \
			
 
				-                        (default: 3.0 P/s)')
			
 
				+                        (default: 2.75 P/s)')
			
 
				     parser.add_argument('-b', '--lookback', type=float, default=3,
			
 
				                         help='Seconds of lookback to determine activity state \
			
 
				                         (default: 3.0s)')
			
--- a/src/feature-extractor/sample.py
+++ b/src/feature-extractor/sample.py
@@ -1,12 +1,14 @@
 
				-import typing
			
 
				 import pyshark
			
 
				 from datetime import datetime
			
 
				+import typing
			
 
				 from typing import List
			
 
				+from common import window
			
 
				 
			
 
				 class Sample:
			
 
				     EPOCH = datetime(1970, 1, 1)
			
 
				     TIME_FMT = '%Y-%m-%d %H:%M:%S.%f'
			
 
				     FILTER = "tcp.flags.push == 1 && tcp.dstport == 22" # len % 8 == 6
			
 
				+    EPSIOLON = 0.0000000000001
			
 
				 
			
 
				     def make_samples(keylog: typing.TextIO,
			
 
				                      pcap: typing.BinaryIO,
			
@@ -40,7 +42,7 @@ class Sample:
 
				         self.__general = {}
			
 
				         self.__extract_tag(keylog)
			
 
				         self.__extract_activity_stats(packets)
			
 
				-        self.__extract_packet_stats(packets)
			
 
				+        self.__extract_time_stats(packets)
			
 
				 
			
 
				     def __extract_tag(self, keylog: typing.TextIO):
			
 
				         import os
			
@@ -53,19 +55,34 @@ class Sample:
 
				         high_activity = []
			
 
				         mid_activity = []
			
 
				         low_activity = []
			
 
				+        last_bin = low_activity
			
 
				+        cur_bin = None
			
 
				+        dead_time = 0.0
			
 
				         q = []
			
 
				+        i = 0
			
 
				         for p in packets:
			
 
				             ptime = Sample.__packet_time(p)
			
 
				+            p.index = i
			
 
				+            i += 1
			
 
				+            if q:
			
 
				+                p.delta = ptime - Sample.__packet_time(q[-1])
			
 
				+            else:
			
 
				+                p.delta = 0.0
			
 
				             q.append(p)
			
 
				+            while Sample.__packet_time(q[0]) + self.lookback < ptime:
			
 
				+                q = q[1:]
			
 
				             rate = float(len(q)) / self.lookback
			
 
				             if rate < self.low_act_threshold:
			
 
				-                low_activity.append(q)
			
 
				+                cur_bin = low_activity
			
 
				             elif rate < self.high_act_threshold:
			
 
				-                mid_activity.append(q)
			
 
				+                cur_bin = mid_activity
			
 
				             else:
			
 
				-                high_activity.append(q)
			
 
				-            while Sample.__packet_time(q[0]) + self.lookback < ptime:
			
 
				-                q = q[1:]
			
 
				+                cur_bin = high_activity
			
 
				+            if last_bin != cur_bin:
			
 
				+                dead_time += max(p.delta - Sample.lookback, 0)
			
 
				+            cur_bin.append(p)
			
 
				+            last_bin = cur_bin
			
 
				+        self["dead_time"] = dead_time
			
 
				         self.__activities = {
			
 
				             "high": Sample.__count_activity_stats(high_activity),
			
 
				             "mid": Sample.__count_activity_stats(mid_activity),
			
@@ -73,13 +90,37 @@ class Sample:
 
				         }
			
 
				 
			
 
				     def __count_activity_stats(arr):
			
 
				+        (time_spent, iat) = Sample.__get_time_spent(arr)
			
 
				         return {
			
 
				-            "total_packets": len(arr)
			
 
				+            "total_packets": len(arr),
			
 
				+            "time_spent": time_spent,
			
 
				+            "average_iat": iat
			
 
				         }
			
 
				-        
			
 
				-    def __extract_packet_stats(self, pcap):
			
 
				+
			
 
				+    def __is_continuous(p1, p2):
			
 
				+        return int(p1.index) + 1 == int(p2.index)
			
 
				+    
			
 
				+    def __get_time_spent(arr):
			
 
				+        if not arr:
			
 
				+            return (0.0, 0.0)
			
 
				+        else:
			
 
				+            time_spent = 0.0
			
 
				+            p_in_seg = 0
			
 
				+            start = Sample.__packet_time(arr[0]) - min(arr[0].delta, Sample.lookback)
			
 
				+            for prev, cur in window(arr):
			
 
				+                if not Sample.__is_continuous(prev, cur):
			
 
				+                    time_spent += Sample.__packet_time(prev) - start
			
 
				+                    start = Sample.__packet_time(cur) - min(cur.delta, Sample.lookback)
			
 
				+                else:
			
 
				+                    # Helps deal with "Lone Wolf" packets
			
 
				+                    p_in_seg += 1
			
 
				+            time_spent += Sample.__packet_time(arr[-1]) - start
			
 
				+            return (time_spent, p_in_seg / time_spent if time_spent != 0 else 0)
			
 
				+    
			
 
				+    def __extract_time_stats(self, pcap):
			
 
				         start = Sample.__packet_time(pcap[0])
			
 
				         end = Sample.__packet_time(pcap[-1])
			
 
				+        self["total_time"] = end - start
			
 
				         self["average_iat"] = (end - start) / len(pcap)
			
 
				 
			
 
				     def __is_valid_prefix(pre):