png2Vect2.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. ########################################################################################
  2. # Author: Thomas Flucke
  3. # Date: 2017-05-13
  4. # Abreviations:
  5. # vect = Vector
  6. # ANN = Artifical Neural Network
  7. # corr = Correct version
  8. ########################################################################################
  9. # Set up the png library
  10. import os
  11. import scipy.ndimage
  12. import itertools
  13. DATA_FOLDER = "pngChars/Hnd/Img/Sample%03d"
  14. IMG_TEMPLATE = "pngChars/Hnd/Img/Sample%03d/%s"
  15. class Datum:
  16. def __init__(self, label, img):
  17. self.label = [0] * (10 + 26 + 26)
  18. self.label[label - 1] = 1
  19. self.img = img
  20. class IAM:
  21. def __init__(self):
  22. print "Building dataset..."
  23. self.train = []
  24. self.test = []
  25. for x in range(1, (10 + 26 + 26) + 1):
  26. print "Preparing sample %d..." % x
  27. for f in os.listdir(DATA_FOLDER % x):
  28. img = scipy.ndimage.imread(IMG_TEMPLATE % (x, f), True)
  29. img = scipy.misc.imresize(img, 0.03)
  30. img = list(itertools.chain.from_iterable(img))
  31. if len(self.test) < (5 * x):
  32. self.test.append(Datum(x, img))
  33. else:
  34. self.train.append(Datum(x, img))
  35. iam = IAM()
  36. print "Test Points: %d" % len(iam.test)
  37. print "Train Points: %d" % len(iam.train)
  38. print "Saving data..."
  39. import cPickle as pickle
  40. with open("iamDataset.obj", 'wb') as output:
  41. pickle.dump(iam, output, -1)