Parcourir la source

Augmented feature extractor to take pairing file.

Now collects ownership/guided information for tagging.
Thomas Flucke il y a 6 ans
Parent
commit
b2347c6435
2 fichiers modifiés avec 24 ajouts et 5 suppressions
  1. 14 3
      src/feature-extractor/extractor.py
  2. 10 2
      src/feature-extractor/sample.py

+ 14 - 3
src/feature-extractor/extractor.py

@@ -1,20 +1,31 @@
 #!/usr/bin/python3
+import typing
+import sys
 
 def parse_args():
     import argparse
     parser = argparse.ArgumentParser(
         description='Extract features from pcap files.')
-    parser.add_argument('pcaps', metavar='pcaps', type=argparse.FileType('rb'),
-                        nargs='+', help='pcap from which to extract features')
+    parser.add_argument('match_file', metavar='match file',
+                        type=argparse.FileType('r'), default=sys.stdin,
+                        help='File of keylog/pcaps matchings (default: stdin)')
+    # parser.add_argument('pcaps', metavar='pcaps', type=argparse.FileType('rb'),
+    #                     nargs='+', help='pcap from which to extract features')
     parser.add_argument('-o', '--outfile', type=argparse.FileType('wb'),
                         default="features.plo", help='Where to save the " \
                         "extracted features (default: features.plo)')
     return parser.parse_args()
 
+def enter_data_dir(match_file: typing.TextIO):
+    import os
+    if match_file is not sys.stdin:
+        os.chdir(os.path.dirname(match_file.name))
+
 def main():
     args = parse_args()
     from sample import Sample
-    out = [Sample(pcap) for pcap in args.pcaps[0:1]];
+    enter_data_dir(args.match_file)
+    out = [Sample(*line.split(" ")) for line in args.match_file if "pcap" in line]
     try:
         import cPickle as pickle
     except:

+ 10 - 2
src/feature-extractor/sample.py

@@ -6,11 +6,19 @@ class Sample:
     EPOCH = datetime(1970, 1, 1)
     TIME_FMT = '%Y-%m-%d %H:%M:%S.%f'
     
-    def __init__(self, pcap: typing.BinaryIO):
-        f = pyshark.FileCapture(pcap, only_summaries=True)
+    def __init__(self, keylog: typing.TextIO, pcap: typing.BinaryIO):
+        self.extract_tag(keylog)
+        f = pyshark.FileCapture(pcap.strip(), only_summaries=True)
         f.load_packets()
         self.extract_packet_stats(f)
 
+    def extract_tag(self, keylog: typing.TextIO):
+        import os
+        dir_guided = os.path.dirname(keylog)
+        self.is_guided = os.path.basename(dir_guided) == "y"
+        dir_user = os.path.dirname(dir_guided)
+        self.user = os.path.basename(dir_user)
+        
     def extract_packet_stats(self, pcap):
         start = (datetime.strptime(pcap[0].time, self.TIME_FMT) - self.EPOCH)\
               .total_seconds()