|
|
@@ -30,7 +30,9 @@ class ParserController @Inject()(
|
|
|
|
|
|
def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] =>
|
|
|
val url = request.body
|
|
|
- _findParser(url).fold(Future.successful(NotFound(s"No parser available for $url."))) { (parser) =>
|
|
|
+ _findParser(url).fold(
|
|
|
+ Future.successful(NotFound(s"No parser available for $url."))
|
|
|
+ ) { (parser) =>
|
|
|
val doc = _browser.get(url)
|
|
|
val title = doc >> parser.titleExtractor
|
|
|
val servings = doc >> parser.servingExtractor
|
|
|
@@ -61,23 +63,27 @@ class ParserController @Inject()(
|
|
|
|
|
|
private def _findParser(url: String): Option[Parser] = {
|
|
|
val host = new java.net.URL(url).getAuthority()
|
|
|
- val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host
|
|
|
+ val hostNoWWW =
|
|
|
+ if (host.startsWith("www.")) host.substring("www.".length) else host
|
|
|
Map(
|
|
|
("epicurious.com" -> Parser.epicurious),
|
|
|
("mccormick.com" -> Parser.mccormick),
|
|
|
("recipetineats.com" -> Parser.recipeTinEats),
|
|
|
("mamalovestocook.com" -> Parser.recipeTinEats),
|
|
|
- ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction)
|
|
|
+ ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction),
|
|
|
+ ("seriouseats.com" -> Parser.seriousEats)
|
|
|
).get(hostNoWWW)
|
|
|
}
|
|
|
|
|
|
- private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = {
|
|
|
+ private def _guessFoodFromStr(
|
|
|
+ foodLine: String
|
|
|
+ ): Future[Ingredient.IngredientId] = {
|
|
|
import gov.usda.nal.fdc.models.DataType._
|
|
|
usdaController.fdc.getFoodsSearch(foodLine
|
|
|
.filter(_ <= 0x7f)
|
|
|
.filterNot(_ == ':')
|
|
|
.filterNot(_ == '/'), Seq(
|
|
|
- Foundation, Survey, SRLegacy
|
|
|
+ Branded, Foundation, SRLegacy
|
|
|
), pageSize = Some(10))().flatMap({ (fdcResult) =>
|
|
|
Future.sequence(
|
|
|
fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId))
|
|
|
@@ -110,7 +116,9 @@ object Parser {
|
|
|
text(".main-title .count").map(_.toFloatOption),
|
|
|
Some(text(".prep_time .first_content")),
|
|
|
cookTimeExtractor = Some(text(".ingredients .first_content")),
|
|
|
- ingredientExtractor = texts(".recipe-about-list li").map(_.map(_parseIngredient _)),
|
|
|
+ ingredientExtractor = texts(".recipe-about-list li").map(
|
|
|
+ _.map(_parseIngredient _)
|
|
|
+ ),
|
|
|
texts(".instructions-main span.para")
|
|
|
)
|
|
|
|
|
|
@@ -120,7 +128,9 @@ object Parser {
|
|
|
.map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
|
|
|
None,
|
|
|
None,
|
|
|
- texts("""div[data-testid="IngredientList"] > div > div""").map(_.map(_parseIngredient _)),
|
|
|
+ texts("""div[data-testid="IngredientList"] > div > div""").map(
|
|
|
+ _.map(_parseIngredient _)
|
|
|
+ ),
|
|
|
texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
|
|
|
)
|
|
|
|
|
|
@@ -162,21 +172,79 @@ object Parser {
|
|
|
.map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
|
|
|
Some(text("span.tasty-recipes-prep-time")),
|
|
|
Some(text("span.tasty-recipes-cook-time")),
|
|
|
- elementList("div.tasty-recipes-ingredients-body > ul > li").map(_.map({(listItem) => (
|
|
|
- ((listItem >?> elementList("span"))
|
|
|
- .map(_.last)
|
|
|
- .fold(0.0f)((elm: Element) => (elm >?> attr("data-amount")).fold(0.0f)(_.toFloat))
|
|
|
- ),
|
|
|
- (listItem >?> elementList("span"))
|
|
|
- .map(_.last)
|
|
|
- .fold[MeasureUnit](Gram)((elm: Element) => (elm >?> attr("data-unit")).flatMap(MeasureUnit.guessUnit _).getOrElse(Count)),
|
|
|
- listItem >> text("strong")
|
|
|
- )})),
|
|
|
+ elementList("div.tasty-recipes-ingredients-body > ul > li").map(
|
|
|
+ _.map({(listItem) => (
|
|
|
+ ((listItem >?> elementList("span"))
|
|
|
+ .map(_.last)
|
|
|
+ .fold(0.0f)((elm: Element) =>
|
|
|
+ (elm >?> attr("data-amount"))
|
|
|
+ .fold(0.0f)(_.toFloat)
|
|
|
+ )
|
|
|
+ ),
|
|
|
+ (listItem >?> elementList("span"))
|
|
|
+ .map(_.last)
|
|
|
+ .fold[MeasureUnit](Gram)((elm: Element) =>
|
|
|
+ (elm >?> attr("data-unit"))
|
|
|
+ .flatMap(MeasureUnit.guessUnit _)
|
|
|
+ .getOrElse(Count)
|
|
|
+ ),
|
|
|
+ listItem >> text("strong")
|
|
|
+ )})
|
|
|
+ ),
|
|
|
texts("div.tasty-recipes-instructions-body > ol > li")
|
|
|
)
|
|
|
|
|
|
+ val seriousEats = Parser(
|
|
|
+ text("h2.recipe-decision-block__title"),
|
|
|
+ text("div.recipe-serving > span > span.meta-text__data")
|
|
|
+ .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
|
|
|
+ //text("div.recipe-yield > span > span.meta-text__data")
|
|
|
+ Some(text("div.prep-time > span > span.meta-text__data")),
|
|
|
+ None, //Some(text("span.tasty-recipes-cook-time")),
|
|
|
+ elementList("ul.structured-ingredients__list > li > p").map(
|
|
|
+ _.map({(p) => (
|
|
|
+ ((p >?> elementList("span"))
|
|
|
+ .flatMap(_
|
|
|
+ .filter((s) => (s >?> attr("data-ingredient-quantity")).isDefined)
|
|
|
+ .lastOption
|
|
|
+ .map(_ >> text)
|
|
|
+ ).flatMap(_parseFraction _)
|
|
|
+ .getOrElse(0.0f)
|
|
|
+ ),
|
|
|
+ ((p >?> elementList("span"))
|
|
|
+ .flatMap(_
|
|
|
+ .filter((s) => (s >?> attr("data-ingredient-unit")).isDefined)
|
|
|
+ .lastOption
|
|
|
+ .map(_ >> text)
|
|
|
+ ).flatMap(MeasureUnit.guessUnit _)
|
|
|
+ .getOrElse(Count)
|
|
|
+ ),
|
|
|
+ ((p >?> elementList("span"))
|
|
|
+ .flatMap(_
|
|
|
+ .filter((s) => (s >?> attr("data-ingredient-name")).isDefined)
|
|
|
+ .headOption
|
|
|
+ ).getOrElse(p) >> text
|
|
|
+ )
|
|
|
+ )})
|
|
|
+ ),
|
|
|
+ texts("div.structured-project__steps_1-0 > ol > li > p")
|
|
|
+ )
|
|
|
+
|
|
|
+ private def _parseFraction(fractionLine: String) = {
|
|
|
+ val fractionPattern = raw"(\d+)/(\d+)".r
|
|
|
+ val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)".r
|
|
|
+ fractionLine match {
|
|
|
+ case fractionPattern(numerator, denominator) =>
|
|
|
+ Some(numerator.toFloat/denominator.toFloat)
|
|
|
+ case mixedFractionPattern(whole, numerator, denominator) =>
|
|
|
+ Some(whole.toFloat + numerator.toFloat/denominator.toFloat)
|
|
|
+ case _ => fractionLine.toFloatOption
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- private def _parseIngredient(ingredientLine: String): (Float, MeasureUnit, String) = {
|
|
|
+ private def _parseIngredient(
|
|
|
+ ingredientLine: String
|
|
|
+ ): (Float, MeasureUnit, String) = {
|
|
|
val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
|
|
|
|
|
|
@@ -184,7 +252,11 @@ object Parser {
|
|
|
case numberPattern(amount, unit, rest) =>
|
|
|
(amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
|
|
|
case fractionPattern(numerator, denominator, unit, rest) =>
|
|
|
- (numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
|
|
|
+ (
|
|
|
+ numerator.toFloat/denominator.toFloat,
|
|
|
+ MeasureUnit.guessUnit(unit).getOrElse(Count),
|
|
|
+ rest
|
|
|
+ )
|
|
|
case noUnitLine =>
|
|
|
(1, Count, noUnitLine)
|
|
|
}
|