1
- from ragflow_sdk import RAGFlow , DataSet , Document , Chunk
1
+ from ragflow_sdk import RAGFlow
2
2
from common import HOST_ADDRESS
3
-
3
+ import pytest
4
4
5
5
def test_upload_document_with_success (get_api_key_fixture ):
6
6
API_KEY = get_api_key_fixture
@@ -48,7 +48,6 @@ def test_list_documents_in_dataset_with_success(get_api_key_fixture):
48
48
ds .list_documents (keywords = "test" , page = 1 , page_size = 12 )
49
49
50
50
51
-
52
51
def test_delete_documents_in_dataset_with_success (get_api_key_fixture ):
53
52
API_KEY = get_api_key_fixture
54
53
rag = RAGFlow (API_KEY , HOST_ADDRESS )
@@ -59,4 +58,109 @@ def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
59
58
docs = ds .upload_documents (document_infos )
60
59
ds .delete_documents ([docs [0 ].id ])
61
60
61
+ # upload and parse the document with different in different parse method.
62
+ def test_upload_and_parse_pdf_documents_with_general_parse_method (get_api_key_fixture ):
63
+ API_KEY = get_api_key_fixture
64
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
65
+ ds = rag .create_dataset (name = "test_pdf_document" )
66
+ with open ("test_data/test.pdf" ,"rb" ) as file :
67
+ blob = file .read ()
68
+ document_infos = [{"displayed_name" : "test.pdf" ,"blob" : blob }]
69
+ docs = ds .upload_documents (document_infos )
70
+ doc = docs [0 ]
71
+ ds .async_parse_documents ([doc .id ])
62
72
73
+ def test_upload_and_parse_docx_documents_with_general_parse_method (get_api_key_fixture ):
74
+ API_KEY = get_api_key_fixture
75
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
76
+ ds = rag .create_dataset (name = "test_docx_document" )
77
+ with open ("test_data/test.docx" ,"rb" ) as file :
78
+ blob = file .read ()
79
+ document_infos = [{"displayed_name" : "test.docx" ,"blob" : blob }]
80
+ docs = ds .upload_documents (document_infos )
81
+ doc = docs [0 ]
82
+ ds .async_parse_documents ([doc .id ])
83
+ def test_upload_and_parse_excel_documents_with_general_parse_method (get_api_key_fixture ):
84
+ API_KEY = get_api_key_fixture
85
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
86
+ ds = rag .create_dataset (name = "test_excel_document" )
87
+ with open ("test_data/test.xlsx" ,"rb" ) as file :
88
+ blob = file .read ()
89
+ document_infos = [{"displayed_name" : "test.xlsx" ,"blob" : blob }]
90
+ docs = ds .upload_documents (document_infos )
91
+ doc = docs [0 ]
92
+ ds .async_parse_documents ([doc .id ])
93
+ def test_upload_and_parse_ppt_documents_with_general_parse_method (get_api_key_fixture ):
94
+ API_KEY = get_api_key_fixture
95
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
96
+ ds = rag .create_dataset (name = "test_ppt_document" )
97
+ with open ("test_data/test.ppt" ,"rb" ) as file :
98
+ blob = file .read ()
99
+ document_infos = [{"displayed_name" : "test.ppt" ,"blob" : blob }]
100
+ docs = ds .upload_documents (document_infos )
101
+ doc = docs [0 ]
102
+ ds .async_parse_documents ([doc .id ])
103
+ def test_upload_and_parse_image_documents_with_general_parse_method (get_api_key_fixture ):
104
+ API_KEY = get_api_key_fixture
105
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
106
+ ds = rag .create_dataset (name = "test_image_document" )
107
+ with open ("test_data/test.jpg" ,"rb" ) as file :
108
+ blob = file .read ()
109
+ document_infos = [{"displayed_name" : "test.jpg" ,"blob" : blob }]
110
+ docs = ds .upload_documents (document_infos )
111
+ doc = docs [0 ]
112
+ ds .async_parse_documents ([doc .id ])
113
+ def test_upload_and_parse_txt_documents_with_general_parse_method (get_api_key_fixture ):
114
+ API_KEY = get_api_key_fixture
115
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
116
+ ds = rag .create_dataset (name = "test_txt_document" )
117
+ with open ("test_data/test.txt" ,"rb" ) as file :
118
+ blob = file .read ()
119
+ document_infos = [{"displayed_name" : "test.txt" ,"blob" : blob }]
120
+ docs = ds .upload_documents (document_infos )
121
+ doc = docs [0 ]
122
+ ds .async_parse_documents ([doc .id ])
123
+ def test_upload_and_parse_md_documents_with_general_parse_method (get_api_key_fixture ):
124
+ API_KEY = get_api_key_fixture
125
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
126
+ ds = rag .create_dataset (name = "test_md_document" )
127
+ with open ("test_data/test.md" ,"rb" ) as file :
128
+ blob = file .read ()
129
+ document_infos = [{"displayed_name" : "test.md" ,"blob" : blob }]
130
+ docs = ds .upload_documents (document_infos )
131
+ doc = docs [0 ]
132
+ ds .async_parse_documents ([doc .id ])
133
+
134
+ def test_upload_and_parse_json_documents_with_general_parse_method (get_api_key_fixture ):
135
+ API_KEY = get_api_key_fixture
136
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
137
+ ds = rag .create_dataset (name = "test_json_document" )
138
+ with open ("test_data/test.json" ,"rb" ) as file :
139
+ blob = file .read ()
140
+ document_infos = [{"displayed_name" : "test.json" ,"blob" : blob }]
141
+ docs = ds .upload_documents (document_infos )
142
+ doc = docs [0 ]
143
+ ds .async_parse_documents ([doc .id ])
144
+
145
+ @pytest .mark .skip (reason = "" )
146
+ def test_upload_and_parse_eml_documents_with_general_parse_method (get_api_key_fixture ):
147
+ API_KEY = get_api_key_fixture
148
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
149
+ ds = rag .create_dataset (name = "test_eml_document" )
150
+ with open ("test_data/test.eml" ,"rb" ) as file :
151
+ blob = file .read ()
152
+ document_infos = [{"displayed_name" : "test.eml" ,"blob" : blob }]
153
+ docs = ds .upload_documents (document_infos )
154
+ doc = docs [0 ]
155
+ ds .async_parse_documents ([doc .id ])
156
+
157
+ def test_upload_and_parse_html_documents_with_general_parse_method (get_api_key_fixture ):
158
+ API_KEY = get_api_key_fixture
159
+ rag = RAGFlow (API_KEY , HOST_ADDRESS )
160
+ ds = rag .create_dataset (name = "test_html_document" )
161
+ with open ("test_data/test.html" ,"rb" ) as file :
162
+ blob = file .read ()
163
+ document_infos = [{"displayed_name" : "test.html" ,"blob" : blob }]
164
+ docs = ds .upload_documents (document_infos )
165
+ doc = docs [0 ]
166
+ ds .async_parse_documents ([doc .id ])
0 commit comments