Browse Source

Extractor can now extract features based on usage mode.

Thomas Flucke 6 năm trước cách đây
mục cha
commit
9b24b6a340
2 tập tin đã thay đổi với 57 bổ sung6 xóa
  1. 12 0
      src/feature-extractor/extractor.py
  2. 45 6
      src/feature-extractor/sample.py

+ 12 - 0
src/feature-extractor/extractor.py

@@ -14,6 +14,15 @@ def parse_args():
     parser.add_argument('-o', '--outfile', type=argparse.FileType('wb'),
                         default="features.plo", help='Where to save the " \
                         "extracted features (default: features.plo)')
+    parser.add_argument('-l', '--low-act-threshold', type=float, default=1,
+                        help='P/s below which is considered low activity \
+                        (default: 3.0 P/s)')
+    parser.add_argument('-i', '--high-act-threshold', type=float, default=3,
+                        help='P/s above which is considered high activity \
+                        (default: 3.0 P/s)')
+    parser.add_argument('-b', '--lookback', type=float, default=3,
+                        help='Seconds of lookback to determine activity state \
+                        (default: 3.0s)')
     return parser.parse_args()
 
 def enter_data_dir(match_file: typing.TextIO):
@@ -25,6 +34,9 @@ def main():
     args = parse_args()
     from sample import Sample
     enter_data_dir(args.match_file)
+    Sample.set_activity_thresholds(args.low_act_threshold,
+                                   args.high_act_threshold,
+                                   args.lookback)
     out = [Sample(*line.split(" ")) for line in args.match_file if "pcap" in line]
     try:
         import cPickle as pickle

+ 45 - 6
src/feature-extractor/sample.py

@@ -5,11 +5,26 @@ from datetime import datetime
 class Sample:
     EPOCH = datetime(1970, 1, 1)
     TIME_FMT = '%Y-%m-%d %H:%M:%S.%f'
-    
+
+    # Boundaries measured in packets/second
+    def set_activity_thresholds(lower_bound: float, upper_bound: float,
+                                lookback: float):
+        assert(lower_bound < upper_bound)
+        assert(0 < lower_bound)
+        assert(0 < lookback)
+        Sample.high_act_threshold = upper_bound
+        Sample.low_act_threshold = lower_bound
+        Sample.lookback = lookback
+
+    def packet_time(p):
+        return (datetime.strptime(p.time, Sample.TIME_FMT) - Sample.EPOCH) \
+            .total_seconds()
+        
     def __init__(self, keylog: typing.TextIO, pcap: typing.BinaryIO):
         self.extract_tag(keylog)
         f = pyshark.FileCapture(pcap.strip(), only_summaries=True)
         f.load_packets()
+        self.extract_activity_stats(f)
         self.extract_packet_stats(f)
 
     def extract_tag(self, keylog: typing.TextIO):
@@ -18,11 +33,35 @@ class Sample:
         self.is_guided = os.path.basename(dir_guided) == "y"
         dir_user = os.path.dirname(dir_guided)
         self.user = os.path.basename(dir_user)
+
+    def extract_activity_stats(self, keylog: typing.TextIO):
+        high_activity = []
+        mid_activity = []
+        low_activity = []
+        q = []
+        for p in keylog:
+            ptime = Sample.packet_time(p)
+            q.append(p)
+            if len(q) < self.low_act_threshold:
+                low_activity.append(q)
+            elif len(q) > self.high_act_threshold:
+                high_activity.append(q)
+            else:
+                mid_activity.append(q)
+            while Sample.packet_time(q[0]) + self.lookback < ptime:
+                q = q[1:]
+        self.activities = {
+            "high": Sample.count_activity_stats(high_activity),
+            "mid": Sample.count_activity_stats(mid_activity),
+            "low": Sample.count_activity_stats(low_activity)
+        }
+
+    def count_activity_stats(arr):
+        return {
+            "total packets": len(arr)
+        }
         
     def extract_packet_stats(self, pcap):
-        start = (datetime.strptime(pcap[0].time, self.TIME_FMT) - self.EPOCH)\
-              .total_seconds()
-        end = (datetime.strptime(pcap[-1].time, self.TIME_FMT) - self.EPOCH)\
-              .total_seconds()
+        start = Sample.packet_time(pcap[0])
+        end = Sample.packet_time(pcap[-1])
         self.average_iat = self.average_iat = (end - start) / len(pcap)
-