ParserController.scala 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. package com.weEat.controllers
  2. import com.weEat.shared.models._
  3. import javax.inject.{Inject,Singleton}
  4. import play.api.libs.json._
  5. import play.api.mvc._
  6. import scala.concurrent.Future
  7. import com.weEat.models.Authorization
  8. import scalaoauth2.provider.{AuthInfoRequest,OAuth2ProviderActionBuilders}
  9. import com.weEat.services.OAuth2Service
  10. import net.ruippeixotog.scalascraper.browser.JsoupBrowser
  11. import net.ruippeixotog.scalascraper.dsl.DSL._
  12. import net.ruippeixotog.scalascraper.dsl.DSL.Extract._
  13. //import net.ruippeixotog.scalascraper.dsl.DSL.Parse._
  14. import net.ruippeixotog.scalascraper.model.{Document,Element}
  15. import net.ruippeixotog.scalascraper.scraper.HtmlExtractor
  16. import scala.util._
  17. import scala.concurrent.ExecutionContext
  18. @Singleton
  19. class ParserController @Inject()(
  20. val controllerComponents: ControllerComponents,
  21. oauth: OAuth2Service,
  22. usdaController: USDAController,
  23. foodController: FoodController
  24. ) extends BaseController
  25. with OAuth2ProviderActionBuilders {
  26. implicit val ec = scala.concurrent.ExecutionContext.global
  27. private val _browser = JsoupBrowser()
  28. def parseURL() = AuthorizedAction[Authorization](oauth).async(parse.text)({ implicit request: AuthInfoRequest[String, Authorization] =>
  29. val url = request.body
  30. val host = new java.net.URL(url).getAuthority()
  31. val hostNoWWW =
  32. if (host.startsWith("www.")) host.substring("www.".length) else host
  33. val doc = _browser.get(url)
  34. Parser(usdaController, foodController)(hostNoWWW, doc).transformWith {
  35. case Success(Some(parser)) => parser(usdaController, foodController)(doc, url)
  36. .map((food) => Ok(Json.toJson(food)))
  37. case Success(None) => Future.successful(NotFound(f"No parser found for host $hostNoWWW"))
  38. case Failure(e) => throw e
  39. }
  40. })
  41. }
  42. case class Parser(
  43. titleExtractor: HtmlExtractor[Element, String],
  44. servingExtractor: HtmlExtractor[Element, Option[Float]],
  45. prepTimeExtractor: Option[HtmlExtractor[Element, String]],
  46. cookTimeExtractor: Option[HtmlExtractor[Element, String]],
  47. ingredientExtractor: HtmlExtractor[Element, Iterable[(Float, MeasureUnit, String)]],
  48. instructionExtractor: HtmlExtractor[Element, Iterable[String]],
  49. )(usdaC: USDAController, foodC: FoodController) {
  50. implicit val ec = scala.concurrent.ExecutionContext.global
  51. def apply(doc: Document, url: String): Future[RecipeNodeNoId] = {
  52. Future({
  53. val title = doc >> titleExtractor
  54. val servings = (doc >?> servingExtractor).flatten
  55. val prepTime = prepTimeExtractor.flatMap(doc >?> _)
  56. val cookTime = cookTimeExtractor.flatMap(doc >?> _)
  57. val ingredients = doc >> ingredientExtractor
  58. val instructions = doc >> instructionExtractor
  59. Future.sequence(ingredients.map({
  60. case (amt, u, line) => _guessFoodFromStr(line).map(Ingredient(_, amt, u))
  61. }))
  62. .map((ingredients) => RecipeNodeNoId(
  63. title,
  64. servings.getOrElse(1.0f),
  65. 1.0f,
  66. UnitType.NUMBER,
  67. ingredients.toSeq,
  68. /* tflucke@[2023-10-26]: Do not pass along the instructions since this
  69. * could be a violation of the Recipe Author's copyright. */
  70. Nil, //instructions.toSeq,
  71. None,
  72. None,
  73. Some(url),
  74. None
  75. ))
  76. }).flatten
  77. }
  78. private def _guessFoodFromStr(
  79. foodLine: String
  80. ): Future[Ingredient.IngredientId] = {
  81. val foodLineFiltered = foodLine
  82. .filter(_ <= 0x7f)
  83. .filterNot(Set.from("!:/-").contains)
  84. searchFdcIndex(foodLineFiltered).transformWith {
  85. case Success(Some(ingredientId)) => Future.successful(ingredientId)
  86. case Success(None) => searchSelfIndex(foodLineFiltered)
  87. case Failure(e) => Future.failed(e)
  88. }
  89. }
  90. def searchFdcIndex(foodLine: String): Future[Option[Ingredient.IngredientId]] = {
  91. import gov.usda.nal.fdc.models.DataType._
  92. import gov.usda.nal.fdc.models.SearchResult
  93. usdaC.fdc.getFoodsSearch(foodLine, Seq(
  94. // Branded,
  95. Foundation, SRLegacy
  96. ), pageSize = Some(10))().flatMap({
  97. case SearchResult(_, _, _, _, Nil) => Future.successful(None)
  98. case SearchResult(_, _, _, _, foods) =>
  99. Future.sequence(
  100. foods.map((food) => foodC.getByFdcId(food.fdcId))
  101. ).map(_.flatten
  102. .headOption
  103. .fold[Ingredient.IngredientId](
  104. Ingredient.USDAId(foods.head.fdcId)
  105. )((foodNode) => Ingredient.FoodNodeId(foodNode._id))
  106. ).map(Some(_))
  107. }).recover {
  108. case e: com.tflucke.webroutes.HTTPException if e.statusCode == 500 =>
  109. println(s"USDA database failed to parse line: '$foodLine'")
  110. throw e
  111. }
  112. }
  113. def searchSelfIndex(foodLine: String): Future[Ingredient.IngredientId] = {
  114. foodC.findByName(foodLine)
  115. .transform {
  116. case Success(Nil) =>
  117. Failure(new NoSuchElementException(foodLine))
  118. case Success(foodNode::rest) =>
  119. Success(Ingredient.FoodNodeId(foodNode._id))
  120. case Success(_) => ???
  121. case Failure(e) => Failure(e)
  122. }
  123. }
  124. }
  125. object Parser {
  126. type ParserFn = (USDAController, FoodController) => Parser
  127. private val knownParsers = Map(
  128. ("epicurious.com" -> Parser.epicurious),
  129. ("mccormick.com" -> Parser.mccormick),
  130. ("recipetineats.com" -> Parser.recipeTinEats),
  131. ("mamalovestocook.com" -> Parser.recipeTinEats),
  132. ("soulfullymade.com" -> Parser.recipeTinEats),
  133. ("familycookierecipes.com" -> Parser.recipeTinEats),
  134. ("familyfreshmeals.com" -> Parser.recipeTinEats),
  135. ("handmadefarmhouse.com" -> Parser.recipeTinEats),
  136. ("tastesoflizzyt.com" -> Parser.recipeTinEats),
  137. ("omnivorescookbook.com" -> Parser.recipeTinEats),
  138. ("growforagecookferment.com" -> Parser.recipeTinEats),
  139. ("joyfoodsunshine.com" -> Parser.recipeTinEats),
  140. ("sallysbakingaddiction.com" -> Parser.tastyRecipes),
  141. ("darngoodveggies.com" -> Parser.tastyRecipes),
  142. ("pickledplum.com" -> Parser.tastyRecipes),
  143. ("iheartvegetables.com" -> Parser.tastyRecipes),
  144. ("seriouseats.com" -> Parser.seriousEats),
  145. ("greatist.com" -> Parser.greatist),
  146. ("dimitrasdishes.com" -> Parser.mvCreate),
  147. ("jif.com" -> Parser.jif),
  148. ("kingarthurbaking.com" -> Parser.kingArthurBaking),
  149. ("tasteasianfood.com" -> Parser.mvCreate),
  150. ("lovefood.com" -> Parser.loveFood)
  151. )
  152. private val frequentParsers = Seq.from(
  153. knownParsers.values.groupMapReduce(p => p)(_ => 1)((a, b) => a + b)
  154. ).sortBy({ case (_, c) => -c })
  155. .map(p => p._1)
  156. def apply(usdaC: USDAController, foodC: FoodController)(
  157. url: String,
  158. doc: Document
  159. )(implicit ec: ExecutionContext): Future[Option[ParserFn]] = {
  160. knownParsers.get(url).fold[Future[Option[ParserFn]]](Future.find(frequentParsers.map { (pfn) =>
  161. // convert the ParserFn to a Future[ParserFn] based on if it's a match
  162. pfn(usdaC, foodC)(doc, url)
  163. .filter(_.ingredients.size > 0)
  164. .map((_) => pfn)
  165. })(_ => true)) { (parser) =>
  166. Future.successful(Some(parser))
  167. }
  168. }
  169. def mccormick: ParserFn = Parser(
  170. text("h1"),
  171. // TODO use extractors
  172. text(".main-title .count").map(_.toFloatOption),
  173. Some(text(".prep_time .first_content")),
  174. cookTimeExtractor = Some(text(".ingredients .first_content")),
  175. ingredientExtractor = texts(".recipe-about-list li").map(
  176. _.map(_parseIngredient _)
  177. ),
  178. texts(".instructions-main span.para")
  179. ) _
  180. def epicurious: ParserFn = Parser(
  181. text("h1"),
  182. text("""div[data-testid="IngredientList"] > p""")
  183. .map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
  184. None,
  185. None,
  186. texts("""div[data-testid="IngredientList"] > div > div""").map(
  187. _.map(_parseIngredient _)
  188. ),
  189. texts("""div[data-testid="InstructionsWrapper"] > ol > li > p""")
  190. ) _
  191. def recipeTinEats: ParserFn = Parser(
  192. text("h2.wprm-recipe-name"),
  193. text("span.wprm-recipe-servings").map(_.toFloatOption),
  194. Some(text("span.wprm-recipe-prep_time-minutes")),
  195. Some(text("span.wprm-recipe-cook_time-minutes")),
  196. elementList("li.wprm-recipe-ingredient").map(_.map({ (li) => (
  197. (li >?> text("span.wprm-recipe-ingredient-amount")
  198. .map(_
  199. .replaceAll("\u00BD", "1/2")
  200. .replaceAll("\u00BC", "1/4")
  201. .replaceAll("\u00BE", "3/4")
  202. .replaceAll("\u2150", "1/7")
  203. .replaceAll("\u2151", "1/9")
  204. .replaceAll("\u2152", "1/10")
  205. .replaceAll("\u2153", "1/3")
  206. .replaceAll("\u2154", "2/3")
  207. .replaceAll("\u2155", "1/5")
  208. .replaceAll("\u2156", "2/5")
  209. .replaceAll("\u2157", "3/5")
  210. .replaceAll("\u2158", "4/5")
  211. .replaceAll("\u2159", "1/6")
  212. .replaceAll("\u215A", "5/6")
  213. .replaceAll("\u215B", "1/8")
  214. .replaceAll("\u215C", "3/8")
  215. .replaceAll("\u215D", "5/8")
  216. .replaceAll("\u215E", "7/8")
  217. .replaceAll("\u215F", "1/")
  218. ))
  219. .flatMap(_parseFraction _)
  220. .getOrElse(0.0f),
  221. (li >?> text("span.wprm-recipe-ingredient-unit"))
  222. .flatMap(MeasureUnit.guessUnit _)
  223. .getOrElse(Count),
  224. li >> text("span.wprm-recipe-ingredient-name")
  225. .map(_.replaceAll("\u00F1", "n"))
  226. ) })),
  227. texts("div.wprm-recipe-instruction-text")
  228. ) _
  229. def mvCreate: ParserFn = Parser(
  230. text("*.mv-create-title-primary"),
  231. text("span.mv-create-nutrition-serving-size").map(_.toFloatOption),
  232. Some(text("div.mv-create-time-prep > span.mv-time-minutes")),
  233. Some(text("div.mv-create-time-active > span.mv-time-minutes")),
  234. texts("div.mv-create-ingredients > ul > li").map(
  235. _.map(_
  236. .replace("and", "")
  237. .replaceAll("\u00BD", "1/2")
  238. .replaceAll("\u00BC", "1/4")
  239. .replaceAll("\u00BE", "3/4")
  240. .replaceAll("\u2150", "1/7")
  241. .replaceAll("\u2151", "1/9")
  242. .replaceAll("\u2152", "1/10")
  243. .replaceAll("\u2153", "1/3")
  244. .replaceAll("\u2154", "2/3")
  245. .replaceAll("\u2155", "1/5")
  246. .replaceAll("\u2156", "2/5")
  247. .replaceAll("\u2157", "3/5")
  248. .replaceAll("\u2158", "4/5")
  249. .replaceAll("\u2159", "1/6")
  250. .replaceAll("\u215A", "5/6")
  251. .replaceAll("\u215B", "1/8")
  252. .replaceAll("\u215C", "3/8")
  253. .replaceAll("\u215D", "5/8")
  254. .replaceAll("\u215E", "7/8")
  255. .replaceAll("\u215F", "1/")
  256. .replaceAll("\u00F1", "n")
  257. .trim
  258. ).map(_parseIngredient _)
  259. ),
  260. texts("div.mv-create-instructions > ol > li")
  261. ) _
  262. def tastyRecipes: ParserFn = Parser(
  263. text("h2.tasty-recipes-title"),
  264. text("span.tasty-recipes-yield")
  265. .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
  266. Some(text("span.tasty-recipes-prep-time")),
  267. Some(text("span.tasty-recipes-cook-time")),
  268. elementList("div.tasty-recipes-ingredients-body > ul > li").map(
  269. _.map({(listItem) => (
  270. ((listItem >?> elementList("span"))
  271. .flatMap(_.lastOption)
  272. .fold(0.0f)((elm: Element) =>
  273. (elm >?> attr("data-amount"))
  274. .fold(0.0f)(_.toFloat)
  275. )
  276. ),
  277. (listItem >?> elementList("span"))
  278. .flatMap(_.lastOption)
  279. .fold[MeasureUnit](Gram)((elm: Element) =>
  280. (elm >?> attr("data-unit"))
  281. .flatMap(MeasureUnit.guessUnit _)
  282. .getOrElse(Count)
  283. ),
  284. (listItem >?> text("strong"))
  285. .filterNot(_.contains("optional"))
  286. .getOrElse(listItem.ownText)
  287. )})
  288. ),
  289. texts("div.tasty-recipes-instructions-body > ol > li")
  290. ) _
  291. def seriousEats: ParserFn = Parser(
  292. text("h2.recipe-decision-block__title"),
  293. text("div.recipe-serving > span > span.meta-text__data")
  294. .map("\\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
  295. //text("div.recipe-yield > span > span.meta-text__data")
  296. Some(text("div.prep-time > span > span.meta-text__data")),
  297. None, //Some(text("span.tasty-recipes-cook-time")),
  298. elementList("ul.structured-ingredients__list > li > p").map(
  299. _.map({(p) => (
  300. ((p >?> elementList("span"))
  301. .flatMap(_
  302. .filter((s) => (s >?> attr("data-ingredient-quantity")).isDefined)
  303. .lastOption
  304. .map(_ >> text)
  305. ).flatMap(_parseFraction _)
  306. .getOrElse(0.0f)
  307. ),
  308. ((p >?> elementList("span"))
  309. .flatMap(_
  310. .filter((s) => (s >?> attr("data-ingredient-unit")).isDefined)
  311. .lastOption
  312. .map(_ >> text)
  313. ).flatMap(MeasureUnit.guessUnit _)
  314. .getOrElse(Count)
  315. ),
  316. ((p >?> elementList("span"))
  317. .flatMap(_
  318. .filter((s) => (s >?> attr("data-ingredient-name")).isDefined)
  319. .headOption
  320. ).getOrElse(p).ownText
  321. )
  322. )})
  323. ),
  324. texts("div.structured-project__steps_1-0 > ol > li > p")
  325. ) _
  326. def greatist: ParserFn = Parser(
  327. text("h1"),
  328. elementList("article.article-body > ul > li").map(
  329. _.filter((listItem) => (listItem >?> text("strong")) == Some("Yield"))
  330. .map(_ >> text)
  331. .head
  332. ).map("Yield: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
  333. // tflucke@[2023-11-28]: TODO They don't give passive, only Active + Total
  334. None,
  335. Some(
  336. elementList("article.article-body > ul > li").map(
  337. _.filter((listItem) => (listItem >?> text("strong")) == Some("Active"))
  338. .map(_ >> text)
  339. .head
  340. ).map("Active: \\D*(\\d+).*".r.findFirstMatchIn(_).fold("0")(_.group(1)))
  341. ),
  342. elementList("article.article-body > ul > li").map(_
  343. .filter((listItem) => (listItem >?> text("strong")) == None)
  344. .map(_ >> text)
  345. .map(_.replaceAll("\u00F1", "n"))
  346. .map(_parseIngredient _)
  347. ),
  348. texts("article.article-body > ol > li")
  349. ) _
  350. def jif: ParserFn = Parser(
  351. text("h1.recipe-name"),
  352. elementList("div.recipe-breakdown-step").map(
  353. _.filter((listItem) => (listItem >?> text("i.servings")).isDefined)
  354. .map(_ >> text("span.recipe-breakdown-detail"))
  355. .head
  356. ).map(_.toFloatOption),
  357. Some(elementList("div.recipe-breakdown-step").map(
  358. _.filter((listItem) => (listItem >?> text("i.prep")).isDefined)
  359. .map(_ >> text("span.recipe-breakdown-detail"))
  360. .head
  361. )),
  362. Some(elementList("div.recipe-breakdown-step").map(
  363. _.filter((listItem) => (listItem >?> text("i.cook")).isDefined)
  364. .map(_ >> text("span.recipe-breakdown-detail"))
  365. .head
  366. )),
  367. texts("div.recipe-ingredients > ul > li")
  368. .map(_.map(_
  369. .replaceAll("\u00BD", "1/2")
  370. .replaceAll("\u00BC", "1/4")
  371. .replaceAll("\u00BE", "3/4")
  372. .replaceAll("\u2150", "1/7")
  373. .replaceAll("\u2151", "1/9")
  374. .replaceAll("\u2152", "1/10")
  375. .replaceAll("\u2153", "1/3")
  376. .replaceAll("\u2154", "2/3")
  377. .replaceAll("\u2155", "1/5")
  378. .replaceAll("\u2156", "2/5")
  379. .replaceAll("\u2157", "3/5")
  380. .replaceAll("\u2158", "4/5")
  381. .replaceAll("\u2159", "1/6")
  382. .replaceAll("\u215A", "5/6")
  383. .replaceAll("\u215B", "1/8")
  384. .replaceAll("\u215C", "3/8")
  385. .replaceAll("\u215D", "5/8")
  386. .replaceAll("\u215E", "7/8")
  387. .replaceAll("\u215F", "1/")
  388. .replaceAll("\u00F1", "n")
  389. .trim
  390. ))
  391. .map(_.map(_parseIngredient _)),
  392. texts("div.recipe-directions > ul > li > p")
  393. ) _
  394. def kingArthurBaking: ParserFn = Parser(
  395. text("h1 > span"),
  396. text("div.stat__item--yield > span").map(_.toFloatOption),
  397. Some(text("div.stat__item--prep > span")),
  398. Some(text("div.stat__item--bake > span")),
  399. texts("div.ingredient-section > ul > li")
  400. .map(_.map(_
  401. .replaceAll("\u00BD", "1/2")
  402. .replaceAll("\u00BC", "1/4")
  403. .replaceAll("\u00BE", "3/4")
  404. .replaceAll("\u2150", "1/7")
  405. .replaceAll("\u2151", "1/9")
  406. .replaceAll("\u2152", "1/10")
  407. .replaceAll("\u2153", "1/3")
  408. .replaceAll("\u2154", "2/3")
  409. .replaceAll("\u2155", "1/5")
  410. .replaceAll("\u2156", "2/5")
  411. .replaceAll("\u2157", "3/5")
  412. .replaceAll("\u2158", "4/5")
  413. .replaceAll("\u2159", "1/6")
  414. .replaceAll("\u215A", "5/6")
  415. .replaceAll("\u215B", "1/8")
  416. .replaceAll("\u215C", "3/8")
  417. .replaceAll("\u215D", "5/8")
  418. .replaceAll("\u215E", "7/8")
  419. .replaceAll("\u215F", "1/")
  420. .replaceAll("\u00F1", "n")
  421. .trim
  422. ))
  423. .map(_.map(_parseIngredient _)),
  424. texts("div.field field--recipe-steps > ol > li > p")
  425. ) _
  426. def loveFood: ParserFn = Parser(
  427. text("h1.post__title"),
  428. elementList("div.layout__item.u-1/2-lap > ul > li").map(
  429. _.filter((listItem) => (listItem >?> text("strong")) == Some("Serves:"))
  430. .map(_ >> text)
  431. .head
  432. ).map("Serves: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1).toFloat)),
  433. Some(elementList("div.layout__item.u-1/2-lap > ul > li").map(
  434. _.filter((listItem) => (listItem >?> text("strong")) == Some("Preparation Time:"))
  435. .map(_ >> text)
  436. .head
  437. ).map("Preparation Time: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1)).getOrElse(""))),
  438. Some(elementList("div.layout__item.u-1/2-lap > ul > li").map(
  439. _.filter((listItem) => (listItem >?> text("strong")) == Some("Cooking Time:"))
  440. .map(_ >> text)
  441. .head
  442. ).map("Cooking Time: \\D*(\\d+).*".r.findFirstMatchIn(_).map(_.group(1)).getOrElse(""))),
  443. texts("ul[name='ingredients-metric'] > li")
  444. .map(_.map(_
  445. .replaceAll("\u00BD", "1/2")
  446. .replaceAll("\u00BC", "1/4")
  447. .replaceAll("\u00BE", "3/4")
  448. .replaceAll("\u2150", "1/7")
  449. .replaceAll("\u2151", "1/9")
  450. .replaceAll("\u2152", "1/10")
  451. .replaceAll("\u2153", "1/3")
  452. .replaceAll("\u2154", "2/3")
  453. .replaceAll("\u2155", "1/5")
  454. .replaceAll("\u2156", "2/5")
  455. .replaceAll("\u2157", "3/5")
  456. .replaceAll("\u2158", "4/5")
  457. .replaceAll("\u2159", "1/6")
  458. .replaceAll("\u215A", "5/6")
  459. .replaceAll("\u215B", "1/8")
  460. .replaceAll("\u215C", "3/8")
  461. .replaceAll("\u215D", "5/8")
  462. .replaceAll("\u215E", "7/8")
  463. .replaceAll("\u215F", "1/")
  464. .replaceAll("\u00F1", "n")
  465. .trim
  466. ))
  467. .map(_.map(_parseIngredient _)),
  468. texts("div.content__step-by-step > ol > li")
  469. ) _
  470. private def _parseFraction(fractionLine: String) = {
  471. val fractionPattern = raw"(\d+)/(\d+)[\d-_]*".r
  472. val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)[\d-_]*".r
  473. fractionLine match {
  474. case fractionPattern(numerator, denominator) =>
  475. Some(numerator.toFloat/denominator.toFloat)
  476. case mixedFractionPattern(whole, numerator, denominator) =>
  477. Some(whole.toFloat + numerator.toFloat/denominator.toFloat)
  478. case _ => fractionLine.toFloatOption
  479. }
  480. }
  481. private def _parseIngredient(
  482. ingredientLine: String
  483. ): (Float, MeasureUnit, String) = {
  484. val numberPattern = raw"(\d+)[\d-_]*\s(\w+)\s+(.+)".r
  485. val fractionPattern = raw"(\d+)/(\d+)[\d-_]*\s(\w+)\s+(.+)".r
  486. val mixedFractionPattern = raw"(\d+)\w+(\d+)/(\d+)\s(\w+)\s+(.+)".r
  487. ingredientLine match {
  488. case mixedFractionPattern(whole, numerator, denominator, unit, rest) =>
  489. (
  490. whole.toFloat + numerator.toFloat/denominator.toFloat,
  491. MeasureUnit.guessUnit(unit).getOrElse(Count),
  492. rest
  493. )
  494. case fractionPattern(numerator, denominator, unit, rest) =>
  495. (
  496. numerator.toFloat/denominator.toFloat,
  497. MeasureUnit.guessUnit(unit).getOrElse(Count),
  498. rest
  499. )
  500. case numberPattern(amount, unit, rest) =>
  501. (amount.toFloat, MeasureUnit.guessUnit(unit).getOrElse(Count), rest)
  502. case noUnitLine =>
  503. (1, Count, noUnitLine)
  504. }
  505. }
  506. }