Browse Source

Parsed iam into MNIST-like data format

Tom Flucke 9 năm trước cách đây
mục cha
commit
fd100490ec
2 tập tin đã thay đổi với 52 bổ sung0 xóa
  1. BIN
      iamDataset.obj
  2. 52 0
      png2Vect2.py

BIN
iamDataset.obj


+ 52 - 0
png2Vect2.py

@@ -0,0 +1,52 @@
+########################################################################################
+# Author: Thomas Flucke
+# Date: 2017-05-13
+
+# Abreviations:
+# vect = Vector
+# ANN  = Artifical Neural Network
+# corr  = Correct version
+
+########################################################################################
+# Set up the png library
+
+import os
+import scipy.ndimage
+import itertools
+
+DATA_FOLDER = "pngChars/Hnd/Img/Sample%03d"
+IMG_TEMPLATE = "pngChars/Hnd/Img/Sample%03d/%s"
+
+class Datum:
+    def __init__(self, label, img):
+        self.label = [0] * (10 + 26 + 26)
+        self.label[label - 1] = 1
+        self.img = img
+
+class IAM:
+    def __init__(self):
+        print "Building dataset..."
+        self.train = []
+        self.test = []
+        for x in range(1, (10 + 26 + 26) + 1):
+            print "Preparing sample %d..." % x
+            for f in os.listdir(DATA_FOLDER % x):
+                img = scipy.ndimage.imread(IMG_TEMPLATE % (x, f), True)
+                img = scipy.misc.imresize(img, 0.03)
+                img = list(itertools.chain.from_iterable(img))
+                if len(self.test) < (5 * x):
+                    self.test.append(Datum(x, img))
+                else:
+                    self.train.append(Datum(x, img))
+
+iam = IAM()
+
+print "Test Points: %d" % len(iam.test)
+print "Train Points: %d" % len(iam.train)
+
+print "Saving data..."
+
+import cPickle as pickle
+
+with open("iamDataset.obj", 'wb') as output:
+    pickle.dump(iam, output, -1)