Procházet zdrojové kódy

Fixed bugs that came from empty bins and added filter for collection mode.

Thomas Flucke před 6 roky
rodič
revize
ca5c8bb450

+ 8 - 5
src/feature-extractor/extractor.py

@@ -34,6 +34,9 @@ def parse_args():
     parser.add_argument('-b', '--lookback', type=float, default=3,
                         help='Seconds of lookback to determine activity state \
                         (default: 3.0s)')
+    parser.add_argument('-d', '--dataset', choices=["guided", "free", "both"],
+                        default="uniform", help='Which dataset to pull from. \
+                        (One of guided, free, or both; default: \"both\")')
     return parser.parse_args()
 
 def enter_data_dir(match_file: typing.TextIO):
@@ -51,9 +54,11 @@ def main():
     users = {}
     for line in args.match_file:
         if "pcap" in line:
-            samples = list(Sample.make_samples(*line.split(" "), args.sample_size))
-            user = samples[0].user
-            if samples:
+            samples = list(Sample.make_samples(*line.split(" "),args.sample_size))
+            if samples and ((not samples[0].is_guided and args.dataset == "free")
+                            or (samples[0].is_guided and args.dataset == "guided")
+                            or args.dataset == "both"):
+                user = samples[0].user
                 if user in users:
                     users[user].extend(samples)
                 else:
@@ -61,10 +66,8 @@ def main():
     fix_point = 99999999999
     if args.fix_sample_count:
         for u in users:
-            print(len(users[u]))
             if len(users[u]) >= args.min and len(users[u]) < fix_point:
                 fix_point = len(users[u])
-        print(fix_point)
     out = [sample
            for u in users if len(users[u]) >= args.min
            for sample in users[u][0:fix_point]]

+ 2 - 2
src/feature-extractor/sample.py

@@ -96,7 +96,7 @@ class Sample:
             "time_spent": time_spent,
             "average_iat": iat,
             "burst_count": burst_count,
-            "avg_burst_size": burst_count/len(arr)
+            "avg_burst_size": burst_count/len(arr) if arr else 0
         }
 
     def __is_continuous(p1, p2):
@@ -107,7 +107,7 @@ class Sample:
         
     def __get_time_spent(arr):
         if not arr:
-            return (0.0, 0.0)
+            return (0.0, 0.0, 0)
         else:
             time_spent = 0.0
             p_in_seg = 0