package com.weEat.controllers import com.weEat.shared.models._ import javax.inject.{Inject,Singleton} import play.api.libs.json._ import play.api.mvc._ import scala.concurrent.Future import com.weEat.models.Authorization import scalaoauth2.provider.{AuthInfoRequest,OAuth2ProviderActionBuilders} import com.weEat.services.OAuth2Service import net.ruippeixotog.scalascraper.browser.JsoupBrowser import net.ruippeixotog.scalascraper.dsl.DSL._ import net.ruippeixotog.scalascraper.dsl.DSL.Extract._ //import net.ruippeixotog.scalascraper.dsl.DSL.Parse._ import net.ruippeixotog.scalascraper.model.Element import net.ruippeixotog.scalascraper.scraper.HtmlExtractor import scala.util._ @Singleton class ParserController @Inject()( val controllerComponents: ControllerComponents, oauth: OAuth2Service, usdaController: USDAController, foodController: FoodController ) extends BaseController with OAuth2ProviderActionBuilders { implicit val ec = scala.concurrent.ExecutionContext.global private val _browser = JsoupBrowser() def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] => val url = request.body _findParser(url).fold(Future.successful(NotFound(s"No parser available for $url."))) { (parser) => val doc = _browser.get(url) val title = doc >> parser.titleExtractor val servings = doc >> parser.servingExtractor val prepTime = parser.prepTimeExtractor.flatMap(doc >?> _) val cookTime = parser.cookTimeExtractor.flatMap(doc >?> _) val ingredients = doc >> parser.ingredientExtractor val instructions = doc >> parser.instructionExtractor Future.sequence(ingredients.map({ case (amt, u, line) => _guessFoodFromStr(line).map(Ingredient(_, amt, u)) })) .map((ingredients) => Ok(Json.toJson(RecipeNodeNoId( title, servings.getOrElse(1.0f), 1.0f, UnitType.NUMBER, ingredients.toSeq, /* tflucke@[2023-10-26]: Do not pss along the instructions since this * could be a violation of the Recipe Author's copyright. */ Nil, //instructions.toSeq, None, None, Some(url), None )))) } }) private def _findParser(url: String): Option[Parser] = { val host = new java.net.URL(url).getAuthority() val hostNoWWW = if (host.startsWith("www.")) host.substring("www.".length) else host Map( ("epicurious.com" -> Parser.epicurious), ("mccormick.com" -> Parser.mccormick), ("recipetineats.com" -> Parser.recipeTinEats), ("mamalovestocook.com" -> Parser.recipeTinEats), ("sallysbakingaddiction.com" -> Parser.sallysBakingAddiction) ).get(hostNoWWW) } private def _guessFoodFromStr(foodLine: String): Future[Ingredient.IngredientId] = { import gov.usda.nal.fdc.models.DataType._ usdaController.fdc.getFoodsSearch(foodLine .filter(_ <= 0x7f) .filterNot(_ == ':') .filterNot(_ == '/'), Seq( Foundation, Survey, SRLegacy ), pageSize = Some(10))().flatMap({ (fdcResult) => Future.sequence( fdcResult.foods.map((food) => foodController.getByFdcId(food.fdcId)) ).map(_.flatten .headOption .fold[Ingredient.IngredientId]( Ingredient.USDAId(fdcResult.foods.head.fdcId) )((foodNode) => Ingredient.FoodNodeId(foodNode._id)) ).transform({ case Success(x) => Success(x) case Failure(x) => println(s"Food lookup failed: $x");Failure(x) }) }) } } case class Parser( titleExtractor: HtmlExtractor[Element, String], servingExtractor: HtmlExtractor[Element, Option[Float]], prepTimeExtractor: Option[HtmlExtractor[Element, String]], cookTimeExtractor: Option[HtmlExtractor[Element, String]], ingredientExtractor: HtmlExtractor[Element, Iterable[(Float, MeasureUnit, String)]], instructionExtractor: HtmlExtractor[Element, Iterable[String]], ) object Parser { val mccormick = Parser( text("h1"), // TODO use extractors text(".main-title .count").map(_.toFloatOption), Some(text(".prep_time .first_content")), cookTimeExtractor = Some(text(".ingredients .first_content")), ingredientExtractor = texts(".recipe-about-list li").map(_.map(_parseIngredient _)), texts(".instructions-main span.para") ) val epicurious = Parser( text("h1"), text("""div[data-testid="IngredientList"] > p""") .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)), None, None, texts("""div[data-testid="IngredientList"] > div > div""").map(_.map(_parseIngredient _)), texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""") ) val recipeTinEats = Parser( text("h2.wprm-recipe-name"), text("span.wprm-recipe-servings").map(_.toFloatOption), Some(text("span.wprm-recipe-prep_time-minutes")), Some(text("span.wprm-recipe-cook_time-minutes")), texts("li.wprm-recipe-ingredient") .map(_.map(_ .replaceAll("\u00BD", "1/2") .replaceAll("\u00BC", "1/4") .replaceAll("\u00BE", "3/4") .replaceAll("\u2150", "1/7") .replaceAll("\u2151", "1/9") .replaceAll("\u2152", "1/10") .replaceAll("\u2153", "1/3") .replaceAll("\u2154", "2/3") .replaceAll("\u2155", "1/5") .replaceAll("\u2156", "2/5") .replaceAll("\u2157", "3/5") .replaceAll("\u2158", "4/5") .replaceAll("\u2159", "1/6") .replaceAll("\u215A", "5/6") .replaceAll("\u215B", "1/8") .replaceAll("\u215C", "3/8") .replaceAll("\u215D", "5/8") .replaceAll("\u215E", "7/8") .replaceAll("\u215F", "1/") .trim )) .map(_.map(_parseIngredient _)), texts("div.wprm-recipe-instruction-text") ) val sallysBakingAddiction = Parser( text("h2.tasty-recipes-title"), text("span.tasty-recipes-yield") .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)), Some(text("span.tasty-recipes-prep-time")), Some(text("span.tasty-recipes-cook-time")), elementList("div.tasty-recipes-ingredients-body > ul > li").map(_.map({(listItem) => ( ((listItem >?> elementList("span")) .map(_.last) .fold(0.0f)((elm: Element) => (elm >?> attr("data-amount")).fold(0.0f)(_.toFloat)) ), (listItem >?> elementList("span")) .map(_.last) .fold[MeasureUnit](Gram)((elm: Element) => (elm >?> attr("data-unit")).flatMap(MeasureUnit.guessUnit _).getOrElse(Count)), listItem >> text("strong") )})), texts("div.tasty-recipes-instructions-body > ol > li") ) private def _parseIngredient(ingredientLine: String): (Float, MeasureUnit, String) = { val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r ingredientLine match { case numberPattern(amount, unit, rest) => (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest) case fractionPattern(numerator, denominator, unit, rest) => (numerator.toFloat/denominator.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest) case noUnitLine => (1, Count, noUnitLine) } } }