Просмотр исходного кода

Added darngoodveggies and dimitrasdishes.com to parser.

Thomas Flucke 2 лет назад
Родитель
Сommit
72cf59a34b
1 измененных файлов с 54 добавлено и 10 удалено
  1. 54 10
      server/app/com/weEat/controllers/ParserController.scala

+ 54 - 10
server/app/com/weEat/controllers/ParserController.scala

@@ -71,9 +71,11 @@ class ParserController @Inject()(
       ("recipetineats.com" -> Parser.recipeTinEats),
       ("mamalovestocook.com" -> Parser.recipeTinEats),
       ("soulfullymade.com" -> Parser.recipeTinEats),
-      ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction),
+      ("sallysbakingaddiction.com" -> Parser.tastyRecipes),
+      ("darngoodveggies.com" -> Parser.tastyRecipes),
       ("seriouseats.com" -> Parser.seriousEats),
       ("greatist.com" -> Parser.greatist),
+      ("dimitrasdishes.com" -> Parser.dimitrasDishes)
     ).get(hostNoWWW)
   }
 
@@ -81,10 +83,11 @@ class ParserController @Inject()(
     foodLine: String
   ): Future[Ingredient.IngredientId] = {
     import gov.usda.nal.fdc.models.DataType._
-    usdaController.fdc.getFoodsSearch(foodLine
+    val foodLineFiltered = foodLine
       .filter(_ <= 0x7f)
       .filterNot(_ == ':')
-      .filterNot(_ == '/'), Seq(
+      .filterNot(_ == '/')
+    usdaController.fdc.getFoodsSearch(foodLineFiltered, Seq(
       // Branded, 
         Foundation, SRLegacy
     ), pageSize = Some(10))().flatMap({ (fdcResult) =>
@@ -170,7 +173,7 @@ object Parser {
     texts("div.wprm-recipe-instruction-text")
   )
 
-  val sallysBakingAddiction = Parser(
+  val tastyRecipes = Parser(
     text("h2.tasty-recipes-title"),
     text("span.tasty-recipes-yield")
       .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
@@ -192,7 +195,7 @@ object Parser {
               .flatMap(MeasureUnit.guessUnit _)
               .getOrElse(Count)
           ),
-        listItem >> text("strong")
+        (listItem >?> text("strong")).getOrElse(listItem.ownText)
       )})
     ),
     texts("div.tasty-recipes-instructions-body > ol > li")
@@ -227,7 +230,7 @@ object Parser {
           .flatMap(_
             .filter((s) => (s >?> attr("data-ingredient-name")).isDefined)
             .headOption
-          ).getOrElse(p) >> text
+          ).getOrElse(p).ownText
         )
       )})
     ),
@@ -259,9 +262,43 @@ object Parser {
     texts("article.article-body > ol > li")
   )
 
+  val dimitrasDishes = Parser(
+    text("h2.mv-create-title-primary"),
+    text("div.mv-create-time-yield > span").map(_.toFloatOption),
+    None,
+    None,
+    texts("div.mv-create-ingredients > ul > li").map(
+      _.map(_
+        .replace("and", "")
+        .replaceAll("\u00BD", "1/2")
+        .replaceAll("\u00BC", "1/4")
+        .replaceAll("\u00BE", "3/4")
+        .replaceAll("\u2150", "1/7")
+        .replaceAll("\u2151", "1/9")
+        .replaceAll("\u2152", "1/10")
+        .replaceAll("\u2153", "1/3")
+        .replaceAll("\u2154", "2/3")
+        .replaceAll("\u2155", "1/5")
+        .replaceAll("\u2156", "2/5")
+        .replaceAll("\u2157", "3/5")
+        .replaceAll("\u2158", "4/5")
+        .replaceAll("\u2159", "1/6")
+        .replaceAll("\u215A", "5/6")
+        .replaceAll("\u215B", "1/8")
+        .replaceAll("\u215C", "3/8")
+        .replaceAll("\u215D", "5/8")
+        .replaceAll("\u215E", "7/8")
+        .replaceAll("\u215F", "1/")
+        .replaceAll("\u00F1", "n")
+        .trim
+      ).map(_parseIngredient _)
+    ),
+    texts("div.mv-create-instructions > ol > li")
+  )
+
   private def _parseFraction(fractionLine: String) = {
-    val fractionPattern = raw"(\d+)/(\d+)".r
-    val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)".r
+    val fractionPattern = raw"(\d+)/(\d+)[\d-_]*".r
+    val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)[\d-_]*".r
     fractionLine match {
       case fractionPattern(numerator, denominator) =>
         Some(numerator.toFloat/denominator.toFloat)
@@ -276,16 +313,23 @@ object Parser {
   ): (Float, MeasureUnit, String) = {
     val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
     val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
+    val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)\s(\w+)\s+(.+)".r
 
     ingredientLine match {
-      case numberPattern(amount, unit, rest) =>
-        (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
+      case mixedFractionPattern(whole, numerator, denominator, unit, rest) =>
+        (
+          whole.toFloat + numerator.toFloat/denominator.toFloat,
+          MeasureUnit.guessUnit(unit).getOrElse(Count),
+          rest
+        )
       case fractionPattern(numerator, denominator, unit, rest) =>
         (
           numerator.toFloat/denominator.toFloat,
           MeasureUnit.guessUnit(unit).getOrElse(Count),
           rest
         )
+      case numberPattern(amount, unit, rest) =>
+        (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
       case noUnitLine =>
         (1, Count, noUnitLine)
     }