瀏覽代碼

Added serious eats parser.

Found an issue with "Sample" foods being included in the parser but not
being parsed correctly.

Temporary fix is to just not attempt to parse it.
Thomas Flucke 2 年之前
父節點
當前提交
18ebb7541e

+ 7 - 2
fdc/shared/src/main/scala/gov/usda/nal/fdc/models/LabelNutrients.scala

@@ -17,7 +17,12 @@ case class LabelNutrients(
 )
 )
 
 
 object LabelNutrients {
 object LabelNutrients {
-  import play.api.libs.json.Json
-  implicit val floatFmt = play.api.libs.json.Reads[Float](json => (json \ "value").validate[Float])
+  import play.api.libs.json.{JsObject,Json}
+  implicit val floatReads = play.api.libs.json.Reads[Float](json => (json \ "value").validate[Float])
+
+  implicit val floatWrites = play.api.libs.json.Writes[Float]({
+    case f: Float => JsObject(Seq("value" -> Json.toJson(f)))
+  })
+
   implicit val fmt = Json.using[Json.WithDefaultValues].format[LabelNutrients]
   implicit val fmt = Json.using[Json.WithDefaultValues].format[LabelNutrients]
 }
 }

+ 91 - 19
server/app/com/weEat/controllers/ParserController.scala

@@ -30,7 +30,9 @@ class ParserController @Inject()(
 
 
   def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] =>
   def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] =>
     val url = request.body
     val url = request.body
-    _findParser(url).fold(Future.successful(NotFound(s"No parser available for $url."))) { (parser) =>
+    _findParser(url).fold(
+      Future.successful(NotFound(s"No parser available for $url."))
+    ) { (parser) =>
       val doc = _browser.get(url)
       val doc = _browser.get(url)
       val title = doc >> parser.titleExtractor
       val title = doc >> parser.titleExtractor
       val servings = doc >> parser.servingExtractor
       val servings = doc >> parser.servingExtractor
@@ -61,23 +63,27 @@ class ParserController @Inject()(
 
 
   private def _findParser(url: String): Option[Parser] = {
   private def _findParser(url: String): Option[Parser] = {
     val host = new java.net.URL(url).getAuthority()
     val host = new java.net.URL(url).getAuthority()
-    val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host
+    val hostNoWWW =
+      if (host.startsWith("www.")) host.substring("www.".length) else host
     Map(
     Map(
       ("epicurious.com" -> Parser.epicurious),
       ("epicurious.com" -> Parser.epicurious),
       ("mccormick.com" -> Parser.mccormick),
       ("mccormick.com" -> Parser.mccormick),
       ("recipetineats.com" -> Parser.recipeTinEats),
       ("recipetineats.com" -> Parser.recipeTinEats),
       ("mamalovestocook.com" -> Parser.recipeTinEats),
       ("mamalovestocook.com" -> Parser.recipeTinEats),
-      ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction)
+      ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction),
+      ("seriouseats.com" -> Parser.seriousEats)
     ).get(hostNoWWW)
     ).get(hostNoWWW)
   }
   }
 
 
-  private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = {
+  private def _guessFoodFromStr(
+    foodLine: String
+  ): Future[Ingredient.IngredientId] = {
     import gov.usda.nal.fdc.models.DataType._
     import gov.usda.nal.fdc.models.DataType._
     usdaController.fdc.getFoodsSearch(foodLine
     usdaController.fdc.getFoodsSearch(foodLine
       .filter(_ <= 0x7f)
       .filter(_ <= 0x7f)
       .filterNot(_ == ':')
       .filterNot(_ == ':')
       .filterNot(_ == '/'), Seq(
       .filterNot(_ == '/'), Seq(
-      Foundation, Survey, SRLegacy
+      Branded, Foundation, SRLegacy
     ), pageSize = Some(10))().flatMap({ (fdcResult) =>
     ), pageSize = Some(10))().flatMap({ (fdcResult) =>
       Future.sequence(
       Future.sequence(
         fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))
         fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))
@@ -110,7 +116,9 @@ object Parser {
     text(".main-title .count").map(_.toFloatOption),
     text(".main-title .count").map(_.toFloatOption),
     Some(text(".prep_time .first_content")),
     Some(text(".prep_time .first_content")),
     cookTimeExtractor = Some(text(".ingredients .first_content")),
     cookTimeExtractor = Some(text(".ingredients .first_content")),
-    ingredientExtractor = texts(".recipe-about-list li").map(_.map(_parseIngredient _)),
+    ingredientExtractor = texts(".recipe-about-list li").map(
+      _.map(_parseIngredient _)
+    ),
     texts(".instructions-main span.para")
     texts(".instructions-main span.para")
   )
   )
 
 
@@ -120,7 +128,9 @@ object Parser {
       .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
       .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
     None,
     None,
     None,
     None,
-    texts("""div[data-testid="IngredientList"] > div > div""").map(_.map(_parseIngredient _)),
+    texts("""div[data-testid="IngredientList"] > div > div""").map(
+      _.map(_parseIngredient _)
+    ),
     texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
     texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
   )
   )
 
 
@@ -162,21 +172,79 @@ object Parser {
       .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
       .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
     Some(text("span.tasty-recipes-prep-time")),
     Some(text("span.tasty-recipes-prep-time")),
     Some(text("span.tasty-recipes-cook-time")),
     Some(text("span.tasty-recipes-cook-time")),
-    elementList("div.tasty-recipes-ingredients-body > ul > li").map(_.map({(listItem) => (
-      ((listItem >?> elementList("span"))
-        .map(_.last)
-        .fold(0.0f)((elm: Element) => (elm >?> attr("data-amount")).fold(0.0f)(_.toFloat))
-      ),
-      (listItem >?> elementList("span"))
-        .map(_.last)
-        .fold[MeasureUnit](Gram)((elm: Element) => (elm >?> attr("data-unit")).flatMap(MeasureUnit.guessUnit _).getOrElse(Count)),
-      listItem >> text("strong")
-    )})),
+    elementList("div.tasty-recipes-ingredients-body > ul > li").map(
+      _.map({(listItem) => (
+        ((listItem >?> elementList("span"))
+          .map(_.last)
+          .fold(0.0f)((elm: Element) =>
+            (elm >?> attr("data-amount"))
+              .fold(0.0f)(_.toFloat)
+          )
+        ),
+        (listItem >?> elementList("span"))
+          .map(_.last)
+          .fold[MeasureUnit](Gram)((elm: Element) =>
+            (elm >?> attr("data-unit"))
+              .flatMap(MeasureUnit.guessUnit _)
+              .getOrElse(Count)
+          ),
+        listItem >> text("strong")
+      )})
+    ),
     texts("div.tasty-recipes-instructions-body > ol > li")
     texts("div.tasty-recipes-instructions-body > ol > li")
   )
   )
 
 
+  val seriousEats = Parser(
+    text("h2.recipe-decision-block__title"),
+    text("div.recipe-serving > span > span.meta-text__data")
+      .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
+    //text("div.recipe-yield > span > span.meta-text__data")
+    Some(text("div.prep-time > span > span.meta-text__data")),
+    None, //Some(text("span.tasty-recipes-cook-time")),
+    elementList("ul.structured-ingredients__list > li > p").map(
+      _.map({(p) => (
+        ((p >?> elementList("span"))
+          .flatMap(_
+            .filter((s) => (s >?> attr("data-ingredient-quantity")).isDefined)
+            .lastOption
+            .map(_ >> text)
+          ).flatMap(_parseFraction _)
+          .getOrElse(0.0f)
+        ),
+        ((p >?> elementList("span"))
+          .flatMap(_
+            .filter((s) => (s >?> attr("data-ingredient-unit")).isDefined)
+            .lastOption
+            .map(_ >> text)
+          ).flatMap(MeasureUnit.guessUnit _)
+          .getOrElse(Count)
+        ),
+        ((p >?> elementList("span"))
+          .flatMap(_
+            .filter((s) => (s >?> attr("data-ingredient-name")).isDefined)
+            .headOption
+          ).getOrElse(p) >> text
+        )
+      )})
+    ),
+    texts("div.structured-project__steps_1-0 > ol > li > p")
+  )
+
+  private def _parseFraction(fractionLine: String) = {
+    val fractionPattern = raw"(\d+)/(\d+)".r
+    val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)".r
+    fractionLine match {
+      case fractionPattern(numerator, denominator) =>
+        Some(numerator.toFloat/denominator.toFloat)
+      case mixedFractionPattern(whole, numerator, denominator) =>
+        Some(whole.toFloat + numerator.toFloat/denominator.toFloat)
+      case _ => fractionLine.toFloatOption
+    }
+  }
 
 
-  private def _parseIngredient(ingredientLine: String): (Float, MeasureUnit, String) = {
+  private def _parseIngredient(
+    ingredientLine: String
+  ): (Float, MeasureUnit, String) = {
     val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
     val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
     val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
     val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
 
 
@@ -184,7 +252,11 @@ object Parser {
       case numberPattern(amount, unit, rest) =>
       case numberPattern(amount, unit, rest) =>
         (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
         (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
       case fractionPattern(numerator, denominator, unit, rest) =>
       case fractionPattern(numerator, denominator, unit, rest) =>
-        (numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
+        (
+          numerator.toFloat/denominator.toFloat,
+          MeasureUnit.guessUnit(unit).getOrElse(Count),
+          rest
+        )
       case noUnitLine =>
       case noUnitLine =>
         (1, Count, noUnitLine)
         (1, Count, noUnitLine)
     }
     }

+ 4 - 3
webClient/src/main/scala/com/weEat/models/RecipeVar.scala

@@ -135,7 +135,7 @@ case class RecipeVar(recipe: Option[RecipeNode])
     { _ => Nil},
     { _ => Nil},
     Some({ str: String =>
     Some({ str: String =>
       USDAController.getFoodsSearch(str, Seq(
       USDAController.getFoodsSearch(str, Seq(
-        Foundation, Survey, SRLegacy
+        Branded, Foundation, SRLegacy
       ).map(_.toString))().map(_.foods.map(USDANodeNoId.fromSearchResult))
       ).map(_.toString))().map(_.foods.map(USDANodeNoId.fromSearchResult))
     }),
     }),
     templates = Some(Templates({(x: USDANodeNoId) => x.name}).copy(
     templates = Some(Templates({(x: USDANodeNoId) => x.name}).copy(
@@ -202,7 +202,8 @@ case class RecipeVar(recipe: Option[RecipeNode])
       div(cls := "row",
       div(cls := "row",
         div(cls := "col-12 input-group",
         div(cls := "col-12 input-group",
           _ingredientInput(Signal.fromFuture(ing.food)) { (e) =>
           _ingredientInput(Signal.fromFuture(ing.food)) { (e) =>
-            e.selectable.map(_.data).foreach({ (node) => id.set(Ingredient.IngredientId.fromFoodNode(node)) })
+            e.selectable.map(_.data).foreach({ (node) =>
+              id.set(Ingredient.IngredientId.fromFoodNode(node)) })
           },
           },
           amountIn,
           amountIn,
           unitIn
           unitIn
@@ -383,7 +384,7 @@ case class RecipeVar(recipe: Option[RecipeNode])
             ul(
             ul(
               listStyleType := "none",
               listStyleType := "none",
               paddingLeft := "0",
               paddingLeft := "0",
-              children <-- _ingredients.signal.splitByIndex {
+              children <-- ingredients.splitByIndex {
                 case (idx, _, ingredientStream) =>
                 case (idx, _, ingredientStream) =>
                   _presentFoodNode(idx)(ingredientStream)
                   _presentFoodNode(idx)(ingredientStream)
               }
               }

+ 1 - 1
webClient/src/main/scala/com/weEat/views/UsdaImporter.scala

@@ -60,7 +60,7 @@ object UsdaImporter extends View[Option[String]] {
     val searchBar: SearchBar[Seq[Signal[Option[Seq[SearchResultFood]]]]] =
     val searchBar: SearchBar[Seq[Signal[Option[Seq[SearchResultFood]]]]] =
       SearchBar((term) =>
       SearchBar((term) =>
         USDAController.getFoodsSearch(term, Seq(
         USDAController.getFoodsSearch(term, Seq(
-          Foundation, Survey, SRLegacy
+          Branded, Foundation, SRLegacy
         ).map(_.toString), pageSize = Some(SEARCH_PAGE_SIZE))().map {
         ).map(_.toString), pageSize = Some(SEARCH_PAGE_SIZE))().map {
           case SearchResult(criteria, n, cur, tot, baseList) =>
           case SearchResult(criteria, n, cur, tot, baseList) =>
             Val(Some(baseList)) +:
             Val(Some(baseList)) +: