Compare commits

..

No commits in common. "master" and "s3-connect" have entirely different histories.

15 changed files with 35 additions and 461 deletions

View File

@ -1,28 +0,0 @@
kind: pipeline
name: default
steps:
- name: docker
image: plugins/docker
settings:
username:
from_secret: dockerhub_username
password:
from_secret: dockerhub_password
repo:
from_secret: dockerhub_repo
tags: latest
when:
branch:
- master
- name: notify
image: clortox/drone-ntfy
settings:
URL: https://ntfy.clortox.com
USERNAME: drone
PASSWORD:
from_secret: ntfy_password
TOPIC: drone-builds
MESSAGE: Meme API build finished!
CLICK: https://drone.clortox.com/Infrastructure/Meme-service
TITLE: Meme API build

1
.gitignore vendored
View File

@ -182,4 +182,3 @@ tags
[._]*.un~ [._]*.un~
apply_environment.sh apply_environment.sh
docker-compose.yml

View File

@ -1,9 +0,0 @@
FROM python
WORKDIR /usr/src/app
COPY ./src ./
RUN pip install -r requirements.txt
CMD "python" "./app.py"

View File

@ -1,18 +1,3 @@
# Meme-service # Meme-service
[![Build Status](https://drone.clortox.com/api/badges/Infrastructure/Meme-service/status.svg)](https://drone.clortox.com/Infrastructure/Meme-service)
A meme serving microservice, written using flask_restx A meme serving microservice, written using flask_restx
## Environment Variables
| Variable | Usage |
| -------- | ----- |
| S3_BUCKET | Bucket name |
| S3_URL | S3 server URL |
| S3_UN | S3 Username |
| S3_PW | S3 Password |
| S3_TLS | Use TLS? Boolean |
| CACHE_TTL | How long do we cache whats in the bucket? |
| PORT | Port to run on inside the container |
| DEBUG | Debug mode? Boolean |

View File

@ -1,21 +1,12 @@
aniso8601==9.0.1 aniso8601==9.0.1
attrs==22.2.0 attrs==22.2.0
certifi==2022.12.7
click==8.1.3 click==8.1.3
envyaml==1.10.211231
Flask==2.2.3 Flask==2.2.3
flask-restx==1.0.6 flask-restx==1.0.6
fuzzywuzzy==0.18.0
itsdangerous==2.1.2 itsdangerous==2.1.2
Jinja2==3.1.2 Jinja2==3.1.2
jsonschema==4.17.3 jsonschema==4.17.3
Levenshtein==0.20.9
MarkupSafe==2.1.2 MarkupSafe==2.1.2
minio==7.1.13
pyrsistent==0.19.3 pyrsistent==0.19.3
python-Levenshtein==0.20.9
pytz==2022.7.1 pytz==2022.7.1
PyYAML==6.0
rapidfuzz==2.13.7
urllib3==1.26.14
Werkzeug==2.2.3 Werkzeug==2.2.3

View File

@ -1,14 +1,10 @@
from flask_restx import Api from flask_restx import Api
from .search import api as searchNamespace from .search import api as searchNamespace
from .get import api as getNamespace
from .util import api as utilNamespace
api = Api( api = Api(
title='Memes', title='Search',
version=1.0, version=1.0,
description='A programatic interface to my meme collection' description='Searching the collection'
) )
api.add_namespace(searchNamespace) api.add_namespace(searchNamespace)
api.add_namespace(getNamespace)
api.add_namespace(utilNamespace)

View File

@ -1,12 +0,0 @@
import logging
import s3Client
def getClientSafely():
logging.debug("Getting a client safely...")
client = None
try:
client = s3Client.getClient()
except Exception as e:
logging.critical("Failed to retrive a client : " + str(e))
return client

View File

@ -1,164 +0,0 @@
from flask_restx import Namespace, Resource, fields
from flask_restx import reqparse
from flask import make_response, abort, request, Response
from minio.commonconfig import Tags
from werkzeug.datastructures import FileStorage
from api.clientGetter import getClientSafely
import logging
import random
# Exported namespace
api = Namespace('resource', description='Interact with the raw underlying files. This namespace does NOT speak json, just raw files')
uploadFields = {'name' : fields.String(title='Name',
description='File name of your meme',
required=True,
example='Funny.mp4'),
'uploader' : fields.String(title='Uploader',
description='Name of the user who uploaded the meme',
required=True),
'nsfw': fields.Boolean(title='NSFW',
description='Is this NSFW/Spoilable?',
default=False),
'file': fields.String(title='File',
description='File as Base64'),
}
uploadForm = api.parser()
uploadForm.add_argument('file',
location='files',
type=FileStorage,
required=True)
uploadForm.add_argument('name',
location='headers',
type=str,
required=True)
uploadForm.add_argument('uploader',
location='headers',
type=str,
required=True)
uploadForm.add_argument('nsfw',
location='headers',
type=str,
required=True)
@api.route('/exact/<string:file_name>')
@api.route('/<string:file_name>', doc={
"description" : "Alias for /exact/{query}"
})
@api.doc(description="Interact with exact raw files.")
class getExactFile(Resource):
@api.doc('get')
@api.response(200, 'Sucess')
@api.response(500, 'S3 Error')
@api.response(404, 'Requested file not found')
def get(self, file_name):
client = getClientSafely()
if client is None:
abort(500, "S3 failed to start")
if file_name in client.getCurrentMemeList():
return make_response(client.getMeme(file_name))
else:
abort(400, "Requested file '" + file_name + "' not found")
@api.route('/')
class addFile(Resource):
@api.response(200, 'Sucess')
@api.response(500, 'S3 Error')
@api.response(400, 'Bad request')
@api.expect(uploadForm)
def post(self):
client = getClientSafely()
if client is None:
abort(500, "S3 failed to start")
args = uploadForm.parse_args()
file = args['file']
fileName = args['name']
uploader = args['uploader']
nsfw = args['nsfw']
print(args)
print(nsfw)
print(str(nsfw))
tags = Tags.new_object_tags()
tags["uploader"] = uploader
tags["nsfw"] = str(nsfw)
if client.addMeme(fileContents=file,
name=fileName,
tags=tags):
return {"message" : "success", "sucess" : True}
else:
return {"message" : "failure", "success" : False}, 500
@api.route('/random')
@api.doc(description="Returns a random meme")
class getRandomFile(Resource):
@api.doc('get')
@api.response(200, 'Sucess')
@api.response(500, 'S3 Error')
def get(self):
client = getClientSafely()
if client is None:
abort(500, "S3 failed to start")
choice = random.choice(tuple(client.getCurrentMemeList()))
response = make_response(client.getMeme(choice))
response.headers['X-Meme-Name'] = choice
return response
@api.route('/psuedorandom')
@api.doc(description="Returns a psuedorandom meme. Will not return the same meme for a set number of requests")
class getRandomFile(Resource):
cache = []
maxSize = 100
@api.doc('get')
@api.response(200, 'Sucess')
@api.response(500, 'S3 Error')
def get(self):
client = getClientSafely()
if client is None:
abort(500, "S3 failed to start")
choice = random.choice(tuple(client.getCurrentMemeList()))
while choice in self.cache:
choice = random.choice(tuple(client.getCurrentMemeList()))
self.cache.append(choice)
if len(self.cache) > self.maxSize:
self.cache.pop()
logging.debug("Contents of cache : " + str(self.cache))
response = make_response(client.getMeme(choice))
response.headers['X-Meme-Name'] = choice
return response
@api.route('/share/<string:file_name>')
@api.doc(description="Returns a share URL from the underlying bucket")
class getShareLink(Resource):
@api.response(200, 'Sucess')
@api.response(500, 'S3 Error')
@api.response(404, 'Requested file not found')
def get(self, file_name):
client = getClientSafely()
if client is None:
abort(500, "S3 failed to start")
if file_name in client.getCurrentMemeList():
url = client.getShareForMeme(file_name)
return {
"url": url,
}
else:
abort(400, "Requested file '" + file_name + "' not found")

View File

@ -1,14 +1,21 @@
from flask_restx import Namespace, Resource, fields from flask_restx import Namespace, Resource, fields
from flask_restx import reqparse from flask_restx import reqparse
from api.clientGetter import getClientSafely import s3Client
from api.get import getExactFile as getApi
from flask import abort
import nlp
import logging import logging
# Exported namespace # Exported namespace
api = Namespace('search', description='Searching for memes') api = Namespace('search', description='Searching for memes')
def getClientSafely():
logging.debug("Getting a client safely...")
client = None
try:
client = s3Client.getClient()
except Exception as e:
logging.critical("Failed to retrive a client : " + str(e))
return client
@api.route('/exact/<string:query>') @api.route('/exact/<string:query>')
@api.doc(params={ @api.doc(params={
@ -19,50 +26,14 @@ class exactSearch(Resource):
@api.response(200, 'Sucess') @api.response(200, 'Sucess')
@api.response(500, 'S3 Error') @api.response(500, 'S3 Error')
def get(self, query): def get(self, query):
logging.debug("Getting a client safely...")
client = getClientSafely() client = getClientSafely()
if client is None: if client is None:
abort(500, "S3 failed to start") return {
"message": "Error connecting to S3"
}, 500
if query in client.getCurrentMemeList(): if query in client.getCurrentMemeList():
return { return "nice"
"found" : True,
"url" : "/resource/exact/" + query,
"name" : query,
"tags" : client.getTagsOnMeme(query),
}
else: else:
return { "found" : False } return "boo"
@api.route('/close/<string:query>')
@api.route('/<string:query>', doc={
"description" : "Alias for /close/{query}"
})
@api.doc(params={
'query' : 'Search query to attempt to compare against'
},description="Find a meme thats close using levenshtein distance")
class textualClose(Resource):
@api.doc('fuzzy search')
@api.response(200, 'Sucess')
@api.response(500, 'S3 Error')
def get(self, query):
client = getClientSafely()
if client is None:
abort(500, "S3 failed to start")
foundMemes = nlp.getCloseMemes(client.getCurrentMemeList(), query)
listToReturn = []
for meme in foundMemes:
entry = {
"found" : True,
"url" : "/resource/exact/" + meme,
"name" : meme,
"tags" : client.getTagsOnMeme(meme),
}
listToReturn.append(entry)
return {
"results" : listToReturn,
"numberOfResults" : len(foundMemes)
}

View File

@ -1,20 +0,0 @@
from flask_restx import Namespace, Resource, fields
from flask_restx import reqparse
from flask import abort
from api.clientGetter import getClientSafely
import logging
# Exported namespace
api = Namespace('util', description='Misc Utilities')
@api.route('/count')
@api.doc(description="Get number of memes in store")
class getCount(Resource):
def get(self):
client = getClientSafely()
if client is None:
abort(500, "S3 failed to start")
return {
"count" : len(client.getCurrentMemeList())
}, 200

View File

@ -6,13 +6,12 @@ from api import api
import os import os
import logging import logging
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.INFO)
isDebug = bool(os.environ.get('DEBUG', False)) isDebug = True
port = int(os.environ.get('PORT', 5000))
app = Flask(__name__) app = Flask(__name__)
api.init_app(app) api.init_app(app)
app.run(debug=isDebug, host='0.0.0.0', port=port) app.run(debug=isDebug)

View File

@ -1,7 +0,0 @@
# Load configuration file
#
import envyaml
import os
config = envyaml.EnvYAML(os.environ.get('CONFIG_PATH', 'config.yaml'))

View File

@ -1,6 +0,0 @@
s3:
url: s3.clortox.com
username: ${S3_USERNAME}
password: ${S3_PASSWORD}
tls: True
bucket: memes

View File

@ -1,32 +0,0 @@
from fuzzywuzzy import fuzz, process
from api.clientGetter import getClientSafely
import logging
def getCloseMemes(allPossibleMemes: set, query: str):
if not isinstance(allPossibleMemes, set):
raise Exception("Expected set for allPossibleMemes")
if not isinstance(query, str):
raise Exception("Expected str for query")
topMeme = ''
topMemes = []
topScore = 0
for meme in allPossibleMemes:
currentScore = fuzz.partial_ratio(query, meme)
if currentScore > topScore:
topMeme = meme
topScore = currentScore
if currentScore == 100:
topMemes.append(meme)
topMeme = meme
logging.info('Top memes for given query (' + query + ")")
logging.info('topMemes: ' + str(topMemes))
logging.info('topMeme : ' + topMeme)
if len(topMemes) == 0:
topMemes.append(topMeme)
return topMemes

View File

@ -1,22 +1,19 @@
import logging import logging
import os import os
from minio.commonconfig import Tags
from minio import Minio from minio import Minio
from minio.commonconfig import Tags from minio.commonconfig import Tags
from minio.error import S3Error from minio.error import S3Error
from config import config
from datetime import datetime from datetime import datetime
from functools import lru_cache
S3_URL = "" S3_URL = ""
S3_UN = "" S3_UN = ""
S3_PW = "" S3_PW = ""
S3_TLS = True S3_TLS = True
S3_BUCKET = "" S3_BUCKET = ""
CACHE_TTL = 10
gclient = None gclient = None
@ -30,30 +27,30 @@ def getClient():
if gclient != None: if gclient != None:
return gclient return gclient
if "url" not in config["s3"]: if "S3_URL" not in os.environ:
raise Exception("S3_URL is not set!") raise Exception("S3_URL is not set!")
S3_URL = config["s3.url"] S3_URL = os.environ["S3_URL"]
logging.info("Using S3_URL : " + S3_URL ) logging.info("Using S3_URL : " + S3_URL )
if "username" not in config["s3"]: if "S3_UN" not in os.environ:
raise Exception("S3_UN is not set!") raise Exception("S3_UN is not set!")
S3_UN = config["s3.username"] S3_UN = os.environ["S3_UN"]
logging.info("Using S3_UN : " + S3_UN) logging.info("Using S3_UN : " + S3_UN)
if "password" not in config["s3"]: if "S3_PW" not in os.environ:
raise Exception("S3_PW is not set!") raise Exception("S3_PW is not set!")
S3_PW = config["s3.password"] S3_PW = os.environ["S3_PW"]
logging.info("Using S3_PW : " + S3_PW) logging.info("Using S3_PW : " + S3_PW)
if "bucket" not in config["s3"]: if "S3_BUCKET" not in os.environ:
raise Exception("S3_BUCKET is not set!") raise Exception("S3_BUCKET is not set!")
S3_BUCKET = config["s3.bucket"] S3_BUCKET = os.environ["S3_BUCKET"]
logging.info("Using S3_BUCKET : " + S3_BUCKET) logging.info("Using S3_BUCKET : " + S3_BUCKET)
# override defaults # override defaults
if "tls" in config["s3"]: if "S3_TLS" in os.environ:
S3_TLS = config["s3.tls"] S3_TLS = os.environ["S3_TLS"].lower() in ("yes", "true", "1", "t")
logging.info("Using S3_TLS : " + str(S3_TLS)) logging.info("Using S3_TLS : " + str(S3_TLS))
client = Minio(S3_URL, client = Minio(S3_URL,
@ -75,7 +72,6 @@ def getClient():
class Client: class Client:
allMemes = set() allMemes = set()
memesToMd5 = dict() memesToMd5 = dict()
memesToTags = dict()
lastCheckedAllMemes = datetime.strptime("2000-01-01 01:01:01", "%Y-%m-%d %H:%M:%S") lastCheckedAllMemes = datetime.strptime("2000-01-01 01:01:01", "%Y-%m-%d %H:%M:%S")
client = None client = None
@ -88,12 +84,8 @@ class Client:
self.client = client self.client = client
else: else:
raise Exception("Improper object passed for client!") raise Exception("Improper object passed for client!")
self.getCurrentMemeList(force=True)
def getCurrentMemeList(self, force=False): def getCurrentMemeList(self, force=False):
"""
Get a list of all memes in the bucket
"""
now = datetime.now() now = datetime.now()
if (now - self.lastCheckedAllMemes).seconds > 300 or force: if (now - self.lastCheckedAllMemes).seconds > 300 or force:
logging.info("Enough time has elapsed, refreshing meme cache...") logging.info("Enough time has elapsed, refreshing meme cache...")
@ -101,7 +93,6 @@ class Client:
self.allMemes.clear() self.allMemes.clear()
self.memesToMd5.clear() self.memesToMd5.clear()
self.memesToTags.clear()
for obj in self.client.list_objects(S3_BUCKET): for obj in self.client.list_objects(S3_BUCKET):
if not obj.is_dir: if not obj.is_dir:
@ -111,84 +102,4 @@ class Client:
return self.allMemes return self.allMemes
@lru_cache(maxsize=32)
def getMeme(self, memeName: str):
"""
Return a meme with the exact given name, or raise an exception
"""
if not isinstance(memeName, str):
raise Exception("paramater memeName is of improper type, expected a str")
memeSet = self.getCurrentMemeList()
if memeName in memeSet:
reply = self.client.get_object(bucket_name=S3_BUCKET,
object_name=memeName)
return reply.read()
else:
raise Exception("Requested meme '" + memeName + "' not found")
return None
def addMeme(self, fileContents, name: str, tags: Tags = Tags.new_object_tags()):
result = self.client.put_object(bucket_name=S3_BUCKET,
object_name=name,
data=fileContents,
length=-1,
tags=tags,
part_size=10*1024*1024)
if result.etag in self.memesToMd5:
logging.info('Uploaded meme named ' + name + ' already exists')
self.client.remove_object(bucket_name=S3_BUCKET,
object_name=name)
return False
else:
self.allMemes.add(name)
self.memesToMd5[name] = result.etag
self.memesToTags[name] = tags
return True
@lru_cache(maxsize=32)
def getTagsOnMeme(self, memeName: str):
"""
Returns the S3 Tags object for a given meme
"""
if not isinstance(memeName, str):
raise Exception("paramater memeName is of improper type, expected a str")
if memeName in self.memesToTags:
return self.memesToTags[memeName]
memeSet = self.getCurrentMemeList()
if memeName in memeSet:
reply = self.client.get_object_tags(bucket_name=S3_BUCKET,
object_name=memeName)
self.memesToTags[memeName] = reply
return reply
else:
raise Exception("Requested meme '" + memeName + "' not found")
return None
def getShareForMeme(self, memeName: str) -> str:
"""
Returns the S3 bucket's share link for the meme
"""
if not isinstance(memeName, str):
raise Exception("paramater memeName is of improper type, expected a str")
if memeName not in self.getCurrentMemeList():
raise Exception("Requested meme '" + memeName + "' not found")
reply = self.client.get_presigned_url(
method="GET",
bucket_name=S3_BUCKET,
object_name=memeName)
return reply