Преглед изворни кода

Added more detailed timing breakdown and iat per usage mode.

Thomas Flucke пре 6 година
родитељ
комит
74dad6eb63
3 измењених фајлова са 68 додато и 13 уклоњено
  1. 13 0
      src/feature-extractor/common.py
  2. 4 3
      src/feature-extractor/extractor.py
  3. 51 10
      src/feature-extractor/sample.py

+ 13 - 0
src/feature-extractor/common.py

@@ -0,0 +1,13 @@
+from itertools import islice
+
+# From stackoverflow.com/questions/6822725/rolling-or-sliding-window-iterator
+def window(seq, n=2):
+    "Returns a sliding window (of width n) over data from the iterable"
+    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
+    it = iter(seq)
+    result = tuple(islice(it, n))
+    if len(result) == n:
+        yield result
+    for elem in it:
+        result = result[1:] + (elem,)
+        yield result

+ 4 - 3
src/feature-extractor/extractor.py

@@ -17,12 +17,13 @@ def parse_args():
     parser.add_argument('-s', '--sample-size', type=int,
                         default="200", help='number of packets in a sample \
                         (default: 200)')
-    parser.add_argument('-l', '--low-act-threshold', type=float, default=1,
+    parser.add_argument('-l', '--low-act-threshold', type=float, default=1.0,
                         help='P/s below which is considered low activity \
                         (default: 1.0 P/s)')
-    parser.add_argument('-i', '--high-act-threshold', type=float, default=3,
+    # Average typing speed is 3 keys/second
+    parser.add_argument('-i', '--high-act-threshold', type=float, default=2.7,
                         help='P/s above which is considered high activity \
-                        (default: 3.0 P/s)')
+                        (default: 2.75 P/s)')
     parser.add_argument('-b', '--lookback', type=float, default=3,
                         help='Seconds of lookback to determine activity state \
                         (default: 3.0s)')

+ 51 - 10
src/feature-extractor/sample.py

@@ -1,12 +1,14 @@
-import typing
 import pyshark
 from datetime import datetime
+import typing
 from typing import List
+from common import window
 
 class Sample:
     EPOCH = datetime(1970, 1, 1)
     TIME_FMT = '%Y-%m-%d %H:%M:%S.%f'
     FILTER = "tcp.flags.push == 1 && tcp.dstport == 22" # len % 8 == 6
+    EPSIOLON = 0.0000000000001
 
     def make_samples(keylog: typing.TextIO,
                      pcap: typing.BinaryIO,
@@ -40,7 +42,7 @@ class Sample:
         self.__general = {}
         self.__extract_tag(keylog)
         self.__extract_activity_stats(packets)
-        self.__extract_packet_stats(packets)
+        self.__extract_time_stats(packets)
 
     def __extract_tag(self, keylog: typing.TextIO):
         import os
@@ -53,19 +55,34 @@ class Sample:
         high_activity = []
         mid_activity = []
         low_activity = []
+        last_bin = low_activity
+        cur_bin = None
+        dead_time = 0.0
         q = []
+        i = 0
         for p in packets:
             ptime = Sample.__packet_time(p)
+            p.index = i
+            i += 1
+            if q:
+                p.delta = ptime - Sample.__packet_time(q[-1])
+            else:
+                p.delta = 0.0
             q.append(p)
+            while Sample.__packet_time(q[0]) + self.lookback < ptime:
+                q = q[1:]
             rate = float(len(q)) / self.lookback
             if rate < self.low_act_threshold:
-                low_activity.append(q)
+                cur_bin = low_activity
             elif rate < self.high_act_threshold:
-                mid_activity.append(q)
+                cur_bin = mid_activity
             else:
-                high_activity.append(q)
-            while Sample.__packet_time(q[0]) + self.lookback < ptime:
-                q = q[1:]
+                cur_bin = high_activity
+            if last_bin != cur_bin:
+                dead_time += max(p.delta - Sample.lookback, 0)
+            cur_bin.append(p)
+            last_bin = cur_bin
+        self["dead_time"] = dead_time
         self.__activities = {
             "high": Sample.__count_activity_stats(high_activity),
             "mid": Sample.__count_activity_stats(mid_activity),
@@ -73,13 +90,37 @@ class Sample:
         }
 
     def __count_activity_stats(arr):
+        (time_spent, iat) = Sample.__get_time_spent(arr)
         return {
-            "total_packets": len(arr)
+            "total_packets": len(arr),
+            "time_spent": time_spent,
+            "average_iat": iat
         }
-        
-    def __extract_packet_stats(self, pcap):
+
+    def __is_continuous(p1, p2):
+        return int(p1.index) + 1 == int(p2.index)
+    
+    def __get_time_spent(arr):
+        if not arr:
+            return (0.0, 0.0)
+        else:
+            time_spent = 0.0
+            p_in_seg = 0
+            start = Sample.__packet_time(arr[0]) - min(arr[0].delta, Sample.lookback)
+            for prev, cur in window(arr):
+                if not Sample.__is_continuous(prev, cur):
+                    time_spent += Sample.__packet_time(prev) - start
+                    start = Sample.__packet_time(cur) - min(cur.delta, Sample.lookback)
+                else:
+                    # Helps deal with "Lone Wolf" packets
+                    p_in_seg += 1
+            time_spent += Sample.__packet_time(arr[-1]) - start
+            return (time_spent, p_in_seg / time_spent if time_spent != 0 else 0)
+    
+    def __extract_time_stats(self, pcap):
         start = Sample.__packet_time(pcap[0])
         end = Sample.__packet_time(pcap[-1])
+        self["total_time"] = end - start
         self["average_iat"] = (end - start) / len(pcap)
 
     def __is_valid_prefix(pre):