소스 검색

Merge branch 'master' of https://git.tflucke.name/tflucke/SSH-Master-Thesis

Tom Flucke 6 년 전
부모
커밋
ab5dcc22fd
1개의 변경된 파일8개의 추가작업 그리고 3개의 파일을 삭제
  1. 8 3
      src/feature-extractor/extractor.py

+ 8 - 3
src/feature-extractor/extractor.py

@@ -1,6 +1,5 @@
 #!/usr/bin/python3
-import typing
-import sys
+import typing, sys, os
 
 def parse_args():
     import argparse
@@ -43,10 +42,12 @@ def parse_args():
     parser.add_argument('-d', '--dataset', choices=["guided", "free", "both"],
                         default="uniform", help='Which dataset to pull from. \
                         (One of guided, free, or both; default: \"both\")')
+    parser.add_argument('-u', '--min-users', type=int, default="20",
+                        help='Minimum number of users to consider a valid \
+                        dataset. (default: 20)')
     return parser.parse_args()
 
 def enter_data_dir(match_file: typing.TextIO):
-    import os
     if match_file is not sys.stdin:
         os.chdir(os.path.dirname(match_file.name))
 
@@ -71,6 +72,10 @@ def main():
                     users[user].extend(samples)
                 else:
                     users[user] = samples
+    if len(users) < args.min_users:
+        print("Not enough data to build valid dataset.", file=sys.stderr)
+        os.remove(args.outfile)
+        return
     fix_point = 99999999999
     if args.fix_sample_count:
         for u in users: