Skip to content

Commit c2b1869

Browse files
committed
Add unit tests for write
1 parent 8d1d0cb commit c2b1869

File tree

3 files changed

+69
-14
lines changed

3 files changed

+69
-14
lines changed

sdv/io/local/local.py

+14
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,11 @@ def read(self, folder_name, file_names=None, read_csv_parameters=None):
6969
file_names (list of str, optional):
7070
The names of CSV files to read. If None, all files ending with '.csv'
7171
in the folder are read.
72+
read_csv_parameters (dict):
73+
A dictionary with additional parameters to use when reading the CSVs.
74+
The keys are any of the parameter names of the pandas.read_csv function
75+
and the values are your inputs. Defaults to
76+
`{'parse_dates': False, 'low_memory': False, 'on_bad_lines': 'warn'}`
7277
7378
Returns:
7479
dict:
@@ -77,6 +82,10 @@ def read(self, folder_name, file_names=None, read_csv_parameters=None):
7782
Raises:
7883
FileNotFoundError:
7984
If the specified files do not exist in the folder.
85+
86+
ValueError:
87+
If a provided parameter in `read_csv_parameters` is not supported by the
88+
`CSVHandler`.
8089
"""
8190
data = {}
8291
folder_path = Path(folder_name)
@@ -134,6 +143,11 @@ def write(
134143
'x': Write to new files, raising errors if existing files exist with the same name.
135144
'w': Write to new files, clearing any existing files that exist.
136145
'a': Append the new CSV rows to any existing files.
146+
147+
to_csv_parameters (dict):
148+
A dictionary with additional parameters to use when writing the CSVs.
149+
The keys are any of the parameter names of the pandas.to_csv function and
150+
the values are your input. Defaults to `{ 'index': False }`.
137151
"""
138152
folder_path = Path(folder_name)
139153
to_csv_parameters = to_csv_parameters or {}

tests/integration/io/local/test_local.py

+26
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,32 @@ def test_integration_write_and_read(self, tmpdir):
3333
pd.testing.assert_frame_equal(data['table1'], synthetic_data['table1'])
3434
pd.testing.assert_frame_equal(data['table2'], synthetic_data['table2'])
3535

36+
def test_integration_write_and_read_with_custom_parameters(self, tmpdir):
37+
"""Test end to end the write and read methods of ``CSVHandler``."""
38+
# Prepare synthetic data
39+
synthetic_data = {
40+
'table1': pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']}),
41+
'table2': pd.DataFrame({'col3': [4, 5, 6], 'col4': ['d', 'e', 'f']}),
42+
}
43+
44+
# Write synthetic data to CSV files
45+
handler = CSVHandler()
46+
write_params = {'sep': ';', 'index': True}
47+
handler.write(synthetic_data, tmpdir, to_csv_parameters=write_params)
48+
49+
# Read data from CSV files
50+
read_params = {'nrows': 1, 'sep': ';', 'index_col': 'Unnamed: 0'}
51+
data = handler.read(tmpdir, read_csv_parameters=read_params)
52+
53+
# Check if data was read correctly
54+
assert len(data) == 2
55+
assert 'table1' in data
56+
assert 'table2' in data
57+
assert len(data['table1']) == 1
58+
assert len(data['table2']) == 1
59+
pd.testing.assert_frame_equal(data['table1'], synthetic_data['table1'].head(1))
60+
pd.testing.assert_frame_equal(data['table2'], synthetic_data['table2'].head(1))
61+
3662

3763
class TestExcelHandler:
3864
def test_integration_write_and_read(self, tmpdir):

tests/unit/io/local/test_local.py

+29-14
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,10 @@ def test_read_files_custom_parameters(self, tmpdir):
151151
"""Test the read method of CSVHandler class with custom read parameters."""
152152
# Setup
153153
file_path = Path(tmpdir)
154-
read_csv_parameters = {
154+
read_csv_parameters = {
155155
'encoding': 'latin-1',
156156
'nrows': 1,
157-
'escapechar': "\\",
157+
'escapechar': '\\',
158158
'quotechar': '"',
159159
'sep': ';',
160160
}
@@ -169,22 +169,18 @@ def test_read_files_custom_parameters(self, tmpdir):
169169

170170
# Run
171171
data = handler.read(
172-
tmpdir,
173-
file_names=['parent.csv'],
174-
read_csv_parameters=read_csv_parameters
172+
tmpdir, file_names=['parent.csv'], read_csv_parameters=read_csv_parameters
175173
)
176174

177175
# Assert
178176
assert 'parent' in data
179-
pd.testing.assert_frame_equal(
180-
data['parent'], pd.DataFrame({'col1': [1], 'col2': ['a']})
181-
)
177+
pd.testing.assert_frame_equal(data['parent'], pd.DataFrame({'col1': [1], 'col2': ['a']}))
182178

183179
def test_read_files_bad_parameters(self, tmpdir):
184180
"""Test the read method of CSVHandler class with custom read parameters."""
185181
# Setup
186182
file_path = Path(tmpdir)
187-
read_csv_parameters = {
183+
read_csv_parameters = {
188184
'filepath_or_buffer': 'myfile',
189185
'nrows': 1,
190186
'sep': ';',
@@ -205,11 +201,7 @@ def test_read_files_bad_parameters(self, tmpdir):
205201
'parameters instead.'
206202
)
207203
with pytest.raises(ValueError, match=error_msg):
208-
handler.read(
209-
tmpdir,
210-
file_names=['parent.csv'],
211-
read_csv_parameters=read_csv_parameters
212-
)
204+
handler.read(tmpdir, file_names=['parent.csv'], read_csv_parameters=read_csv_parameters)
213205

214206
def test_write(self, tmpdir):
215207
"""Test the write functionality of a CSVHandler."""
@@ -288,6 +280,29 @@ def test_write_file_exists_mode_is_w(self, tmpdir):
288280
expected_dataframe = pd.DataFrame({'col1': [1, 2, 3], 'col2': ['a', 'b', 'c']})
289281
pd.testing.assert_frame_equal(dataframe, expected_dataframe)
290282

283+
def test_write_file_with_custom_params(self, tmpdir):
284+
"""Test the write functionality of a CSVHandler when the mode is ``w``."""
285+
# Setup
286+
table_one_mock = Mock()
287+
table_two_mock = Mock()
288+
289+
synthetic_data = {'table1': table_one_mock, 'table2': table_two_mock}
290+
291+
os.makedirs(tmpdir / 'synthetic_data')
292+
handler = CSVHandler()
293+
write_parameters = {'index': True, 'sep': ';'}
294+
295+
# Run
296+
handler.write(synthetic_data, tmpdir / 'synthetic_data', to_csv_parameters=write_parameters)
297+
298+
# Assert
299+
table_one_mock.to_csv.assert_called_once_with(
300+
tmpdir / 'synthetic_data' / 'table1.csv', index=True, sep=';', mode='x'
301+
)
302+
table_two_mock.to_csv.assert_called_once_with(
303+
tmpdir / 'synthetic_data' / 'table2.csv', index=True, sep=';', mode='x'
304+
)
305+
291306

292307
class TestExcelHandler:
293308
def test___init__(self):

0 commit comments

Comments
 (0)