Add recipe parser

This commit is contained in:
Tyler Perkins 2023-08-13 22:15:02 -04:00
parent 5c9897958b
commit 5b4a232cb6
3 changed files with 118 additions and 3 deletions

11
Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM python:3.9
WORKDIR /code
COPY ./requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
COPY ./src /code/src
CMD ["uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "80"]

35
requirements.txt Normal file
View File

@ -0,0 +1,35 @@
annotated-types==0.5.0
anyio==3.7.1
beautifulsoup4==4.12.2
certifi==2023.7.22
charset-normalizer==3.2.0
click==8.1.6
envyaml==1.10.211231
exceptiongroup==1.1.2
extruct==0.16.0
fastapi==0.101.0
h11==0.14.0
html-text==0.5.2
html5lib==1.1
idna==3.4
isodate==0.6.1
jstyleson==0.0.2
lxml==4.9.3
mf2py==1.1.3
pydantic==2.1.1
pydantic_core==2.4.0
pyparsing==3.1.1
pyRdfa3==3.5.3
PyYAML==6.0.1
rdflib==7.0.0
recipe-scrapers==14.42.0
requests==2.31.0
six==1.16.0
sniffio==1.3.0
soupsieve==2.4.1
starlette==0.27.0
typing_extensions==4.7.1
urllib3==2.0.4
uvicorn==0.23.2
w3lib==2.1.2
webencodings==0.5.1

View File

@ -1,11 +1,80 @@
from fastapi import APIRouter, Depends, HTTPException
from recipe_scrapers import scrape_html, scrape_me, WebsiteNotImplementedError, NoSchemaFoundInWildMode
from typing import List, Optional
from pydantic import BaseModel
import logging
router = APIRouter(
prefix="/recipe",
tags=["NLP"],
tags=["Recipes", "Web scraping"],
responses={404: {"description": "Not found"}}
)
class IngredientGroup(BaseModel):
ingredients: List[str]
purpose: Optional[str]
class Nutrients(BaseModel):
calories: str
carbohydrateContent: str
proteinContent: str
fatContent: str
saturatedFatContent: str
cholesterolContent: str
sodiumContent: str
fiberContent: str
sugarContent: str
servingSize: str
class Recipe(BaseModel):
author: Optional[str]
canonical_url: str
category: str
cook_time: int
cuisine: str
description: str
host: str
image: str
ingredient_groups: List[IngredientGroup]
ingredients: List[str]
instructions: str
instructions_list: List[str]
language: str
nutrients: Nutrients
prep_time: int
ratings: float
site_name: str
title: str
total_time: int
yields: str
@router.get("/")
async def test():
return "Hello world!"
async def json(url: str):
recipe_dict = await getRecipe(url)
ret = Recipe.parse_obj(recipe_dict)
return ret
@router.get("/md")
async def markdown(url: str):
pass
async def getRecipe(url: str) -> str:
if len(url) == 0:
raise HttpException(status_code=400, detail="url cannot be empty")
ret = ""
try:
recipe = scrape_me(url, wild_mode=True)
recipe_dict = recipe.to_json()
except NoSchemaFoundInWildMode:
raise HTTPException(status_code=400, detail="Failed to find a recipe on the site. We may have failed to fetch it, or it might really not be a recipe site")
except Exception:
raise HTTPException(status_code=500, detail="Failed to obtain recipe")
logging.info("Recipe found")
logging.info(recipe_dict)
return recipe_dict