-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
152 lines (123 loc) · 4.84 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from flask import Flask, request, jsonify
import os
from config import (
UPLOAD_FOLDER,
ALLOWED_EXTENSIONS,
REDIS_URL,
QUERY_RESULT_LIMIT,
MAX_DOCUMENT_SIZE,
)
from tasks import process_document_task
import logging
from utils import FileHandler, WeaviateHelper, logger, IndexingHelper
app = Flask(__name__)
# Configure upload folder
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = MAX_DOCUMENT_SIZE
# Get Weaviate client
client = WeaviateHelper.get_client()
if client is None:
raise Exception("Failed to initialize Weaviate client")
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@app.before_request
def before_request():
if not os.path.exists(UPLOAD_FOLDER):
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
@app.route('/ingest', methods=['POST'])
def ingest_document():
"""
Ingest a new document for processing.
Accepts multipart/form-data with a 'file' field.
"""
logger.info("Ingesting document...")
if 'file' not in request.files:
logger.warning("No file part in request")
return jsonify({'error': 'No file part'}), 400
file = request.files['file']
if file.filename == '':
logger.warning("No selected file")
return jsonify({'error': 'No selected file'}), 400
if not FileHandler.allowed_file(file.filename, ALLOWED_EXTENSIONS):
logger.warning(f"File type not allowed: {file.filename}")
return jsonify({'error': 'File type not allowed'}), 400
try:
# Save file
file_path = FileHandler.secure_file_save(file, UPLOAD_FOLDER)
if not file_path:
return jsonify({'error': 'Failed to save file'}), 500
# Queue the document processing task
task = process_document_task.delay(file_path)
return jsonify({
'status': 'success',
'message': 'Document uploaded and queued for processing',
'task_id': task.id
}), 200
except Exception as e:
logger.error(f"Error processing document: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/query', methods=['POST'])
def query_document():
"""Query a document with a search query."""
try:
data = request.get_json()
if not data:
return jsonify({'error': 'No data provided'}), 400
query = data.get('query')
document_id = data.get('document_id')
limit = data.get('limit', QUERY_RESULT_LIMIT)
if not query:
return jsonify({'error': 'No query provided'}), 400
if not document_id:
return jsonify({'error': 'No document_id provided'}), 400
indexing_helper = IndexingHelper(REDIS_URL)
results = indexing_helper.query_document(query, document_id, limit)
return jsonify(results), 200
except ValueError as ve:
return jsonify({'error': str(ve)}), 404
except Exception as e:
logger.error(f"Error querying document: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/status/<task_id>', methods=['GET'])
def get_task_status(task_id):
"""Get the status of a document processing task."""
try:
task = process_document_task.AsyncResult(task_id)
if task.state == 'PENDING':
return jsonify({'status': 'processing'})
elif task.state == 'SUCCESS':
result = task.result
if isinstance(result, dict) and result.get('status') == 'success':
return jsonify({
'status': 'completed',
'document_id': result.get('document_id'),
'message': 'Document processing completed successfully'
})
else:
return jsonify({
'status': 'failed',
'error': 'Document processing failed - invalid result format'
}), 500
elif task.state == 'FAILURE':
return jsonify({
'status': 'failed',
'error': str(task.result)
}), 500
else: # STARTED, RETRY, or other states
return jsonify({'status': 'processing'})
except Exception as e:
logger.error(f"Error checking task status: {str(e)}")
return jsonify({'error': str(e)}), 500
@app.route('/health', methods=['GET'])
def health_check():
"""Check the health of the application and its dependencies."""
try:
client = WeaviateHelper.get_client()
if client is None:
return jsonify({'status': 'error', 'message': 'Failed to connect to Weaviate'}), 500
return jsonify({'status': 'healthy'}), 200
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)}), 500
if __name__ == '__main__':
app.run(debug=True)