Просмотр исходного кода

Added serious eats parser.

Found an issue with "Sample" foods being included in the parser but not
being parsed correctly.

Temporary fix is to just not attempt to parse it.
Thomas Flucke 2 лет назад
Родитель
Сommit
18ebb7541e

+ 7 - 2
fdc/shared/src/main/scala/gov/usda/nal/fdc/models/LabelNutrients.scala

@@ -17,7 +17,12 @@ case class LabelNutrients(
 )
 
 object LabelNutrients {
-  import play.api.libs.json.Json
-  implicit val floatFmt = play.api.libs.json.Reads[Float](json => (json \ "value").validate[Float])
+  import play.api.libs.json.{JsObject,Json}
+  implicit val floatReads = play.api.libs.json.Reads[Float](json => (json \ "value").validate[Float])
+
+  implicit val floatWrites = play.api.libs.json.Writes[Float]({
+    case f: Float => JsObject(Seq("value" -> Json.toJson(f)))
+  })
+
   implicit val fmt = Json.using[Json.WithDefaultValues].format[LabelNutrients]
 }

+ 91 - 19
server/app/com/weEat/controllers/ParserController.scala

@@ -30,7 +30,9 @@ class ParserController @Inject()(
 
   def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] =>
     val url = request.body
-    _findParser(url).fold(Future.successful(NotFound(s"No parser available for $url."))) { (parser) =>
+    _findParser(url).fold(
+      Future.successful(NotFound(s"No parser available for $url."))
+    ) { (parser) =>
       val doc = _browser.get(url)
       val title = doc >> parser.titleExtractor
       val servings = doc >> parser.servingExtractor
@@ -61,23 +63,27 @@ class ParserController @Inject()(
 
   private def _findParser(url: String): Option[Parser] = {
     val host = new java.net.URL(url).getAuthority()
-    val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host
+    val hostNoWWW =
+      if (host.startsWith("www.")) host.substring("www.".length) else host
     Map(
       ("epicurious.com" -> Parser.epicurious),
       ("mccormick.com" -> Parser.mccormick),
       ("recipetineats.com" -> Parser.recipeTinEats),
       ("mamalovestocook.com" -> Parser.recipeTinEats),
-      ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction)
+      ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction),
+      ("seriouseats.com" -> Parser.seriousEats)
     ).get(hostNoWWW)
   }
 
-  private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = {
+  private def _guessFoodFromStr(
+    foodLine: String
+  ): Future[Ingredient.IngredientId] = {
     import gov.usda.nal.fdc.models.DataType._
     usdaController.fdc.getFoodsSearch(foodLine
       .filter(_ <= 0x7f)
       .filterNot(_ == ':')
       .filterNot(_ == '/'), Seq(
-      Foundation, Survey, SRLegacy
+      Branded, Foundation, SRLegacy
     ), pageSize = Some(10))().flatMap({ (fdcResult) =>
       Future.sequence(
         fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))
@@ -110,7 +116,9 @@ object Parser {
     text(".main-title .count").map(_.toFloatOption),
     Some(text(".prep_time .first_content")),
     cookTimeExtractor = Some(text(".ingredients .first_content")),
-    ingredientExtractor = texts(".recipe-about-list li").map(_.map(_parseIngredient _)),
+    ingredientExtractor = texts(".recipe-about-list li").map(
+      _.map(_parseIngredient _)
+    ),
     texts(".instructions-main span.para")
   )
 
@@ -120,7 +128,9 @@ object Parser {
       .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
     None,
     None,
-    texts("""div[data-testid="IngredientList"] > div > div""").map(_.map(_parseIngredient _)),
+    texts("""div[data-testid="IngredientList"] > div > div""").map(
+      _.map(_parseIngredient _)
+    ),
     texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
   )
 
@@ -162,21 +172,79 @@ object Parser {
       .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
     Some(text("span.tasty-recipes-prep-time")),
     Some(text("span.tasty-recipes-cook-time")),
-    elementList("div.tasty-recipes-ingredients-body > ul > li").map(_.map({(listItem) => (
-      ((listItem >?> elementList("span"))
-        .map(_.last)
-        .fold(0.0f)((elm: Element) => (elm >?> attr("data-amount")).fold(0.0f)(_.toFloat))
-      ),
-      (listItem >?> elementList("span"))
-        .map(_.last)
-        .fold[MeasureUnit](Gram)((elm: Element) => (elm >?> attr("data-unit")).flatMap(MeasureUnit.guessUnit _).getOrElse(Count)),
-      listItem >> text("strong")
-    )})),
+    elementList("div.tasty-recipes-ingredients-body > ul > li").map(
+      _.map({(listItem) => (
+        ((listItem >?> elementList("span"))
+          .map(_.last)
+          .fold(0.0f)((elm: Element) =>
+            (elm >?> attr("data-amount"))
+              .fold(0.0f)(_.toFloat)
+          )
+        ),
+        (listItem >?> elementList("span"))
+          .map(_.last)
+          .fold[MeasureUnit](Gram)((elm: Element) =>
+            (elm >?> attr("data-unit"))
+              .flatMap(MeasureUnit.guessUnit _)
+              .getOrElse(Count)
+          ),
+        listItem >> text("strong")
+      )})
+    ),
     texts("div.tasty-recipes-instructions-body > ol > li")
   )
 
+  val seriousEats = Parser(
+    text("h2.recipe-decision-block__title"),
+    text("div.recipe-serving > span > span.meta-text__data")
+      .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
+    //text("div.recipe-yield > span > span.meta-text__data")
+    Some(text("div.prep-time > span > span.meta-text__data")),
+    None, //Some(text("span.tasty-recipes-cook-time")),
+    elementList("ul.structured-ingredients__list > li > p").map(
+      _.map({(p) => (
+        ((p >?> elementList("span"))
+          .flatMap(_
+            .filter((s) => (s >?> attr("data-ingredient-quantity")).isDefined)
+            .lastOption
+            .map(_ >> text)
+          ).flatMap(_parseFraction _)
+          .getOrElse(0.0f)
+        ),
+        ((p >?> elementList("span"))
+          .flatMap(_
+            .filter((s) => (s >?> attr("data-ingredient-unit")).isDefined)
+            .lastOption
+            .map(_ >> text)
+          ).flatMap(MeasureUnit.guessUnit _)
+          .getOrElse(Count)
+        ),
+        ((p >?> elementList("span"))
+          .flatMap(_
+            .filter((s) => (s >?> attr("data-ingredient-name")).isDefined)
+            .headOption
+          ).getOrElse(p) >> text
+        )
+      )})
+    ),
+    texts("div.structured-project__steps_1-0 > ol > li > p")
+  )
+
+  private def _parseFraction(fractionLine: String) = {
+    val fractionPattern = raw"(\d+)/(\d+)".r
+    val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)".r
+    fractionLine match {
+      case fractionPattern(numerator, denominator) =>
+        Some(numerator.toFloat/denominator.toFloat)
+      case mixedFractionPattern(whole, numerator, denominator) =>
+        Some(whole.toFloat + numerator.toFloat/denominator.toFloat)
+      case _ => fractionLine.toFloatOption
+    }
+  }
 
-  private def _parseIngredient(ingredientLine: String): (Float, MeasureUnit, String) = {
+  private def _parseIngredient(
+    ingredientLine: String
+  ): (Float, MeasureUnit, String) = {
     val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
     val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
 
@@ -184,7 +252,11 @@ object Parser {
       case numberPattern(amount, unit, rest) =>
         (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
       case fractionPattern(numerator, denominator, unit, rest) =>
-        (numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
+        (
+          numerator.toFloat/denominator.toFloat,
+          MeasureUnit.guessUnit(unit).getOrElse(Count),
+          rest
+        )
       case noUnitLine =>
         (1, Count, noUnitLine)
     }

+ 4 - 3
webClient/src/main/scala/com/weEat/models/RecipeVar.scala

@@ -135,7 +135,7 @@ case class RecipeVar(recipe: Option[RecipeNode])
     { _ => Nil},
     Some({ str: String =>
       USDAController.getFoodsSearch(str, Seq(
-        Foundation, Survey, SRLegacy
+        Branded, Foundation, SRLegacy
       ).map(_.toString))().map(_.foods.map(USDANodeNoId.fromSearchResult))
     }),
     templates = Some(Templates({(x: USDANodeNoId) => x.name}).copy(
@@ -202,7 +202,8 @@ case class RecipeVar(recipe: Option[RecipeNode])
       div(cls := "row",
         div(cls := "col-12 input-group",
           _ingredientInput(Signal.fromFuture(ing.food)) { (e) =>
-            e.selectable.map(_.data).foreach({ (node) => id.set(Ingredient.IngredientId.fromFoodNode(node)) })
+            e.selectable.map(_.data).foreach({ (node) =>
+              id.set(Ingredient.IngredientId.fromFoodNode(node)) })
           },
           amountIn,
           unitIn
@@ -383,7 +384,7 @@ case class RecipeVar(recipe: Option[RecipeNode])
             ul(
               listStyleType := "none",
               paddingLeft := "0",
-              children <-- _ingredients.signal.splitByIndex {
+              children <-- ingredients.splitByIndex {
                 case (idx, _, ingredientStream) =>
                   _presentFoodNode(idx)(ingredientStream)
               }

+ 1 - 1
webClient/src/main/scala/com/weEat/views/UsdaImporter.scala

@@ -60,7 +60,7 @@ object UsdaImporter extends View[Option[String]] {
     val searchBar: SearchBar[Seq[Signal[Option[Seq[SearchResultFood]]]]] =
       SearchBar((term) =>
         USDAController.getFoodsSearch(term, Seq(
-          Foundation, Survey, SRLegacy
+          Branded, Foundation, SRLegacy
         ).map(_.toString), pageSize = Some(SEARCH_PAGE_SIZE))().map {
           case SearchResult(criteria, n, cur, tot, baseList) =>
             Val(Some(baseList)) +: