package com.weEat.controllers import com.weEat.shared.models._ import javax.inject.{Inject,Singleton} import play.api.libs.json._ import play.api.mvc._ import scala.concurrent.Future import com.weEat.models.Authorization import scalaoauth2.provider.{AuthInfoRequest,OAuth2ProviderActionBuilders} import com.weEat.services.OAuth2Service import net.ruippeixotog.scalascraper.browser.JsoupBrowser import net.ruippeixotog.scalascraper.dsl.DSL._ import net.ruippeixotog.scalascraper.dsl.DSL.Extract._ //import net.ruippeixotog.scalascraper.dsl.DSL.Parse._ import net.ruippeixotog.scalascraper.model.Element import net.ruippeixotog.scalascraper.scraper.HtmlExtractor import scala.util._ @Singleton class ParserController @Inject()( val controllerComponents: ControllerComponents, oauth: OAuth2Service, usdaController: USDAController, foodController: FoodController ) extends BaseController with OAuth2ProviderActionBuilders { implicit val ec = scala.concurrent.ExecutionContext.global private val _browser = JsoupBrowser() def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] => val url = request.body _findParser(url).fold( Future.successful(NotFound(s"No parser available for $url.")) ) { (parser) => val doc = _browser.get(url) val title = doc >> parser.titleExtractor val servings = doc >> parser.servingExtractor val prepTime = parser.prepTimeExtractor.flatMap(doc >?> _) val cookTime = parser.cookTimeExtractor.flatMap(doc >?> _) val ingredients = doc >> parser.ingredientExtractor val instructions = doc >> parser.instructionExtractor Future.sequence(ingredients.map({ case (amt, u, line) => _guessFoodFromStr(line).map(Ingredient(_, amt, u)) })) .map((ingredients) => Ok(Json.toJson(RecipeNodeNoId( title, servings.getOrElse(1.0f), 1.0f, UnitType.NUMBER, ingredients.toSeq, /* tflucke@[2023-10-26]: Do not pss along the instructions since this * could be a violation of the Recipe Author's copyright. */ Nil, //instructions.toSeq, None, None, Some(url), None )))) } }) private def _findParser(url: String): Option[Parser] = { val host = new java.net.URL(url).getAuthority() val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host Map( ("epicurious.com" -> Parser.epicurious), ("mccormick.com" -> Parser.mccormick), ("recipetineats.com" -> Parser.recipeTinEats), ("mamalovestocook.com" -> Parser.recipeTinEats), ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction), ("seriouseats.com" -> Parser.seriousEats) ).get(hostNoWWW) } private def _guessFoodFromStr( foodLine: String ): Future[Ingredient.IngredientId] = { import gov.usda.nal.fdc.models.DataType._ usdaController.fdc.getFoodsSearch(foodLine .filter(_ <= 0x7f) .filterNot(_ == ':') .filterNot(_ == '/'), Seq( Branded, Foundation, SRLegacy ), pageSize = Some(10))().flatMap({ (fdcResult) => Future.sequence( fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId)) ).map(_.flatten .headOption .fold[Ingredient.IngredientId]( Ingredient.USDAId(fdcResult.foods.head.fdcId) )((foodNode) => Ingredient.FoodNodeId(foodNode._id)) ).transform({ case Success(x) => Success(x) case Failure(x) => println(s"Food lookup failed: $x");Failure(x) }) }) } } case class Parser( titleExtractor: HtmlExtractor[Element, String], servingExtractor: HtmlExtractor[Element, Option[Float]], prepTimeExtractor: Option[HtmlExtractor[Element, String]], cookTimeExtractor: Option[HtmlExtractor[Element, String]], ingredientExtractor: HtmlExtractor[Element, Iterable[(Float, MeasureUnit, String)]], instructionExtractor: HtmlExtractor[Element, Iterable[String]], ) object Parser { val mccormick = Parser( text("h1"), // TODO use extractors text(".main-title .count").map(_.toFloatOption), Some(text(".prep_time .first_content")), cookTimeExtractor = Some(text(".ingredients .first_content")), ingredientExtractor = texts(".recipe-about-list li").map( _.map(_parseIngredient _) ), texts(".instructions-main span.para") ) val epicurious = Parser( text("h1"), text("""div[data-testid="IngredientList"] > p""") .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)), None, None, texts("""div[data-testid="IngredientList"] > div > div""").map( _.map(_parseIngredient _) ), texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""") ) val recipeTinEats = Parser( text("h2.wprm-recipe-name"), text("span.wprm-recipe-servings").map(_.toFloatOption), Some(text("span.wprm-recipe-prep_time-minutes")), Some(text("span.wprm-recipe-cook_time-minutes")), texts("li.wprm-recipe-ingredient") .map(_.map(_ .replaceAll("\u00BD", "1/2") .replaceAll("\u00BC", "1/4") .replaceAll("\u00BE", "3/4") .replaceAll("\u2150", "1/7") .replaceAll("\u2151", "1/9") .replaceAll("\u2152", "1/10") .replaceAll("\u2153", "1/3") .replaceAll("\u2154", "2/3") .replaceAll("\u2155", "1/5") .replaceAll("\u2156", "2/5") .replaceAll("\u2157", "3/5") .replaceAll("\u2158", "4/5") .replaceAll("\u2159", "1/6") .replaceAll("\u215A", "5/6") .replaceAll("\u215B", "1/8") .replaceAll("\u215C", "3/8") .replaceAll("\u215D", "5/8") .replaceAll("\u215E", "7/8") .replaceAll("\u215F", "1/") .trim )) .map(_.map(_parseIngredient _)), texts("div.wprm-recipe-instruction-text") ) val sallysBakingAddiction = Parser( text("h2.tasty-recipes-title"), text("span.tasty-recipes-yield") .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)), Some(text("span.tasty-recipes-prep-time")), Some(text("span.tasty-recipes-cook-time")), elementList("div.tasty-recipes-ingredients-body > ul > li").map( _.map({(listItem) => ( ((listItem >?> elementList("span")) .map(_.last) .fold(0.0f)((elm: Element) => (elm >?> attr("data-amount")) .fold(0.0f)(_.toFloat) ) ), (listItem >?> elementList("span")) .map(_.last) .fold[MeasureUnit](Gram)((elm: Element) => (elm >?> attr("data-unit")) .flatMap(MeasureUnit.guessUnit _) .getOrElse(Count) ), listItem >> text("strong") )}) ), texts("div.tasty-recipes-instructions-body > ol > li") ) val seriousEats = Parser( text("h2.recipe-decision-block__title"), text("div.recipe-serving > span > span.meta-text__data") .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)), //text("div.recipe-yield > span > span.meta-text__data") Some(text("div.prep-time > span > span.meta-text__data")), None, //Some(text("span.tasty-recipes-cook-time")), elementList("ul.structured-ingredients__list > li > p").map( _.map({(p) => ( ((p >?> elementList("span")) .flatMap(_ .filter((s) => (s >?> attr("data-ingredient-quantity")).isDefined) .lastOption .map(_ >> text) ).flatMap(_parseFraction _) .getOrElse(0.0f) ), ((p >?> elementList("span")) .flatMap(_ .filter((s) => (s >?> attr("data-ingredient-unit")).isDefined) .lastOption .map(_ >> text) ).flatMap(MeasureUnit.guessUnit _) .getOrElse(Count) ), ((p >?> elementList("span")) .flatMap(_ .filter((s) => (s >?> attr("data-ingredient-name")).isDefined) .headOption ).getOrElse(p) >> text ) )}) ), texts("div.structured-project__steps_1-0 > ol > li > p") ) private def _parseFraction(fractionLine: String) = { val fractionPattern = raw"(\d+)/(\d+)".r val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)".r fractionLine match { case fractionPattern(numerator, denominator) => Some(numerator.toFloat/denominator.toFloat) case mixedFractionPattern(whole, numerator, denominator) => Some(whole.toFloat + numerator.toFloat/denominator.toFloat) case _ => fractionLine.toFloatOption } } private def _parseIngredient( ingredientLine: String ): (Float, MeasureUnit, String) = { val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r ingredientLine match { case numberPattern(amount, unit, rest) => (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest) case fractionPattern(numerator, denominator, unit, rest) => ( numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest ) case noUnitLine => (1, Count, noUnitLine) } } }