FEAT: PDF/TEXT uploading and getting file contents done

This commit is contained in:
user86 2020-10-06 21:08:03 +05:30
parent 33bf7d5264
commit 7f7611f700
4 changed files with 42 additions and 30 deletions

43
app.py
View File

@ -2,7 +2,7 @@ import os
from flask import Flask, render_template, redirect, url_for from flask import Flask, render_template, redirect, url_for
from flask.globals import request from flask.globals import request
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from PyPDF2 import PdfFileReader from workers import pdf2text
# Constants # Constants
UPLOAD_FOLDER = './pdf/' UPLOAD_FOLDER = './pdf/'
@ -21,36 +21,31 @@ def index():
@ app.route('/quiz', methods=['GET', 'POST']) @ app.route('/quiz', methods=['GET', 'POST'])
def quiz(): def quiz():
""" Handle upload and conversion of file + other stuff """
UPLOAD_STATUS = False UPLOAD_STATUS = False
if request.method == 'POST':
try: # Make directory to store uploaded files, if not exists
uploaded_file = request.files['file']
# Make directory to store uploaded files
if not os.path.isdir('./pdf'): if not os.path.isdir('./pdf'):
os.mkdir('./pdf') os.mkdir('./pdf')
# Save uploaded file
uploaded_file.save(os.path.join(
app.config['UPLOAD_FOLDER'], secure_filename(uploaded_file.filename)))
UPLOAD_STATUS = True
# Identify file type and other stuff if request.method == 'POST':
uploaded_content = None try:
# Retrieve file from request
uploaded_file = request.files['file']
file_path = os.path.join(
app.config['UPLOAD_FOLDER'], secure_filename(uploaded_file.filename))
file_exten = uploaded_file.filename.rsplit('.', 1)[1].lower() file_exten = uploaded_file.filename.rsplit('.', 1)[1].lower()
if file_exten == 'pdf': # Save uploaded file
# TODO: Move PDF2Text conversion to another file uploaded_file.save(file_path)
print('PDF detected') # Get contents of file
with open(os.path.join( uploaded_content = pdf2text(file_path, file_exten)
app.config['UPLOAD_FOLDER'], secure_filename(uploaded_file.filename)), 'rb') as pdf_file: # File upload + convert success
pdf_reader = PdfFileReader(pdf_file) if uploaded_content is not None:
uploaded_content = pdf_reader.getPage(0).extractText() UPLOAD_STATUS = True
print(uploaded_content)
else:
# Read text file and store contents
pass
except Exception as e: except Exception as e:
print(e) print(e)
return render_template('quiz.html', uploaded=UPLOAD_STATUS, pdftext=uploaded_content) return render_template('quiz.html', uploaded=UPLOAD_STATUS)
if __name__ == "__main__": if __name__ == "__main__":

Binary file not shown.

View File

@ -11,10 +11,6 @@
{% if uploaded == true %} {% if uploaded == true %}
<h1>Your file was uploaded successfully</h1> <h1>Your file was uploaded successfully</h1>
<h1>Your quiz will appear here</h1> <h1>Your quiz will appear here</h1>
<code>Contents of your file</code>
<br />
<br />
{{ pdftext }}
{% else %} {% else %}
<h1>Could not upload file</h1> <h1>Could not upload file</h1>
{% endif %} {% endif %}

21
workers.py Normal file
View File

@ -0,0 +1,21 @@
from PyPDF2 import PdfFileReader
def pdf2text(file_path, file_exten) -> str:
""" Converts a given file to text content """
_content = None
# Identify file type and get its contents
if file_exten == 'pdf':
with open(file_path, 'rb') as pdf_file:
_pdf_reader = PdfFileReader(pdf_file)
_content = _pdf_reader
print('PDF operation done!')
elif file_exten == 'txt':
with open(file_path, 'r') as txt_file:
_content = txt_file.read()
print('TXT operation done!')
return _content