Skip to content

Commit c7c8b38

Browse files
Feiueliuhua
and
liuhua
authored
Add test for document (#3548)
### What problem does this PR solve? Add test for document ### Type of change - [x] New Feature (non-breaking change which adds functionality) Co-authored-by: liuhua <[email protected]>
1 parent 0ac6dc8 commit c7c8b38

21 files changed

+406
-31
lines changed

api/apps/sdk/doc.py

+2
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ def upload(dataset_id, tenant_id):
115115
return get_result(
116116
message="No file selected!", code=settings.RetCode.ARGUMENT_ERROR
117117
)
118+
'''
118119
# total size
119120
total_size = 0
120121
for file_obj in file_objs:
@@ -127,6 +128,7 @@ def upload(dataset_id, tenant_id):
127128
message=f"Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)",
128129
code=settings.RetCode.ARGUMENT_ERROR,
129130
)
131+
'''
130132
e, kb = KnowledgebaseService.get_by_id(dataset_id)
131133
if not e:
132134
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
File renamed without changes.
File renamed without changes.

sdk/python/test/t_document.py

+107-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
from ragflow_sdk import RAGFlow, DataSet, Document, Chunk
1+
from ragflow_sdk import RAGFlow
22
from common import HOST_ADDRESS
3-
3+
import pytest
44

55
def test_upload_document_with_success(get_api_key_fixture):
66
API_KEY = get_api_key_fixture
@@ -48,7 +48,6 @@ def test_list_documents_in_dataset_with_success(get_api_key_fixture):
4848
ds.list_documents(keywords="test", page=1, page_size=12)
4949

5050

51-
5251
def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
5352
API_KEY = get_api_key_fixture
5453
rag = RAGFlow(API_KEY, HOST_ADDRESS)
@@ -59,4 +58,109 @@ def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
5958
docs = ds.upload_documents(document_infos)
6059
ds.delete_documents([docs[0].id])
6160

61+
# upload and parse the document with different in different parse method.
62+
def test_upload_and_parse_pdf_documents_with_general_parse_method(get_api_key_fixture):
63+
API_KEY = get_api_key_fixture
64+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
65+
ds = rag.create_dataset(name="test_pdf_document")
66+
with open("test_data/test.pdf","rb") as file:
67+
blob=file.read()
68+
document_infos = [{"displayed_name": "test.pdf","blob": blob}]
69+
docs=ds.upload_documents(document_infos)
70+
doc = docs[0]
71+
ds.async_parse_documents([doc.id])
6272

73+
def test_upload_and_parse_docx_documents_with_general_parse_method(get_api_key_fixture):
74+
API_KEY = get_api_key_fixture
75+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
76+
ds = rag.create_dataset(name="test_docx_document")
77+
with open("test_data/test.docx","rb") as file:
78+
blob=file.read()
79+
document_infos = [{"displayed_name": "test.docx","blob": blob}]
80+
docs=ds.upload_documents(document_infos)
81+
doc = docs[0]
82+
ds.async_parse_documents([doc.id])
83+
def test_upload_and_parse_excel_documents_with_general_parse_method(get_api_key_fixture):
84+
API_KEY = get_api_key_fixture
85+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
86+
ds = rag.create_dataset(name="test_excel_document")
87+
with open("test_data/test.xlsx","rb") as file:
88+
blob=file.read()
89+
document_infos = [{"displayed_name": "test.xlsx","blob": blob}]
90+
docs=ds.upload_documents(document_infos)
91+
doc = docs[0]
92+
ds.async_parse_documents([doc.id])
93+
def test_upload_and_parse_ppt_documents_with_general_parse_method(get_api_key_fixture):
94+
API_KEY = get_api_key_fixture
95+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
96+
ds = rag.create_dataset(name="test_ppt_document")
97+
with open("test_data/test.ppt","rb") as file:
98+
blob=file.read()
99+
document_infos = [{"displayed_name": "test.ppt","blob": blob}]
100+
docs=ds.upload_documents(document_infos)
101+
doc = docs[0]
102+
ds.async_parse_documents([doc.id])
103+
def test_upload_and_parse_image_documents_with_general_parse_method(get_api_key_fixture):
104+
API_KEY = get_api_key_fixture
105+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
106+
ds = rag.create_dataset(name="test_image_document")
107+
with open("test_data/test.jpg","rb") as file:
108+
blob=file.read()
109+
document_infos = [{"displayed_name": "test.jpg","blob": blob}]
110+
docs=ds.upload_documents(document_infos)
111+
doc = docs[0]
112+
ds.async_parse_documents([doc.id])
113+
def test_upload_and_parse_txt_documents_with_general_parse_method(get_api_key_fixture):
114+
API_KEY = get_api_key_fixture
115+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
116+
ds = rag.create_dataset(name="test_txt_document")
117+
with open("test_data/test.txt","rb") as file:
118+
blob=file.read()
119+
document_infos = [{"displayed_name": "test.txt","blob": blob}]
120+
docs=ds.upload_documents(document_infos)
121+
doc = docs[0]
122+
ds.async_parse_documents([doc.id])
123+
def test_upload_and_parse_md_documents_with_general_parse_method(get_api_key_fixture):
124+
API_KEY = get_api_key_fixture
125+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
126+
ds = rag.create_dataset(name="test_md_document")
127+
with open("test_data/test.md","rb") as file:
128+
blob=file.read()
129+
document_infos = [{"displayed_name": "test.md","blob": blob}]
130+
docs=ds.upload_documents(document_infos)
131+
doc = docs[0]
132+
ds.async_parse_documents([doc.id])
133+
134+
def test_upload_and_parse_json_documents_with_general_parse_method(get_api_key_fixture):
135+
API_KEY = get_api_key_fixture
136+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
137+
ds = rag.create_dataset(name="test_json_document")
138+
with open("test_data/test.json","rb") as file:
139+
blob=file.read()
140+
document_infos = [{"displayed_name": "test.json","blob": blob}]
141+
docs=ds.upload_documents(document_infos)
142+
doc = docs[0]
143+
ds.async_parse_documents([doc.id])
144+
145+
@pytest.mark.skip(reason="")
146+
def test_upload_and_parse_eml_documents_with_general_parse_method(get_api_key_fixture):
147+
API_KEY = get_api_key_fixture
148+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
149+
ds = rag.create_dataset(name="test_eml_document")
150+
with open("test_data/test.eml","rb") as file:
151+
blob=file.read()
152+
document_infos = [{"displayed_name": "test.eml","blob": blob}]
153+
docs=ds.upload_documents(document_infos)
154+
doc = docs[0]
155+
ds.async_parse_documents([doc.id])
156+
157+
def test_upload_and_parse_html_documents_with_general_parse_method(get_api_key_fixture):
158+
API_KEY = get_api_key_fixture
159+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
160+
ds = rag.create_dataset(name="test_html_document")
161+
with open("test_data/test.html","rb") as file:
162+
blob=file.read()
163+
document_infos = [{"displayed_name": "test.html","blob": blob}]
164+
docs=ds.upload_documents(document_infos)
165+
doc = docs[0]
166+
ds.async_parse_documents([doc.id])

sdk/python/test/test_data/.txt

-2
This file was deleted.

sdk/python/test/test_data/empty.txt

Whitespace-only changes.

sdk/python/test/test_data/lol.txt

-3
This file was deleted.

sdk/python/test/test_data/story.txt

-8
This file was deleted.

sdk/python/test/test_data/test.docx

18.7 KB
Binary file not shown.

0 commit comments

Comments
 (0)