Skip to content

Commit 013d7ee

Browse files
mfeurerLouquinze
andauthored
Louquinze development (#1578)
* create new text preprocessing cs * create new text preprocessing cs * set new defaults for text encoding * set new defaults for text encoding * set new defaults for text encoding * Fix bug, rework tests Co-authored-by: lukas <[email protected]>
1 parent f121ee4 commit 013d7ee

File tree

14 files changed

+584
-645
lines changed

14 files changed

+584
-645
lines changed

autosklearn/pipeline/components/data_preprocessing/feature_type.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,9 @@ def set_hyperparameters(
267267
sub_config_dict[new_name] = value
268268

269269
sub_configuration = Configuration(
270-
sub_configuration_space, values=sub_config_dict
270+
sub_configuration_space,
271+
values=sub_config_dict,
272+
allow_inactive_with_values=True,
271273
)
272274

273275
sub_init_params_dict: Optional[Dict[str, Any]] = None

autosklearn/pipeline/components/data_preprocessing/feature_type_categorical.py

-3
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,10 @@ class CategoricalPreprocessingPipeline(BasePipeline):
3333
3 - Minority coalescence: Assign category 1 to all categories whose occurrence
3434
don't sum-up to a certain minimum fraction
3535
4 - One hot encoding: usual sklearn one hot encoding
36-
3736
Parameters
3837
----------
3938
config : ConfigSpace.configuration_space.Configuration
4039
The configuration to evaluate.
41-
4240
random_state : Optional[int | RandomState]
4341
If int, random_state is the seed used by the random number generator;
4442
If RandomState instance, random_state is the random number generator;
@@ -101,7 +99,6 @@ def _get_hyperparameter_search_space(
10199
dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
102100
) -> ConfigurationSpace:
103101
"""Create the hyperparameter configuration space.
104-
105102
Returns
106103
-------
107104
cs : ConfigSpace.configuration_space.Configuration

autosklearn/pipeline/components/data_preprocessing/feature_type_numerical.py

-4
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,10 @@ class NumericalPreprocessingPipeline(BasePipeline):
2626
2 - VarianceThreshold: Removes low-variance features
2727
3 - Rescaling: rescale features according to a certain rule (e.g. normalization,
2828
standartization or min-max)
29-
3029
Parameters
3130
----------
3231
config : ConfigSpace.configuration_space.Configuration
3332
The configuration to evaluate.
34-
3533
random_state : Optional[int | RandomState]
3634
If int, random_state is the seed used by the random number generator;
3735
If RandomState instance, random_state is the random number generator;
@@ -94,10 +92,8 @@ def _get_hyperparameter_search_space(
9492
dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None,
9593
) -> ConfigurationSpace:
9694
"""Create the hyperparameter configuration space.
97-
9895
Parameters
9996
----------
100-
10197
Returns
10298
-------
10399
cs : ConfigSpace.configuration_space.Configuration

autosklearn/pipeline/components/data_preprocessing/text_encoding/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def get_hyperparameter_search_space(
6363
)
6464

6565
if default is None:
66-
defaults = ["bag_of_words_encoding"]
66+
defaults = ["tfidf_encoding"]
6767
for default_ in defaults:
6868
if default_ in available_preprocessors:
6969
default = default_

autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding.py

-134
This file was deleted.

autosklearn/pipeline/components/data_preprocessing/text_encoding/bag_of_word_encoding_distinct.py

-140
This file was deleted.

0 commit comments

Comments
 (0)