Skip to content

Commit 242a409

Browse files
authored
Spelling (#464)
* spelling: a missing value is Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: all Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: big-endian Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: cannot Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: categorical Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: dissertation Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: endianness Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: feature Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: handle Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: label Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: little-endian Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: occurs Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: preexisting Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: scikit Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: sklearn Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: that Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: unknown Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: used Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: whether or not Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * fix sentence Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --------- Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com>
1 parent 2455467 commit 242a409

File tree

17 files changed

+30
-30
lines changed

17 files changed

+30
-30
lines changed

category_encoders/base_contrast_encoder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def _transform(self, X) -> pd.DataFrame:
100100
X = self.ordinal_encoder.transform(X)
101101
if self.handle_unknown == 'error':
102102
if X[self.cols].isin([-1]).any().any():
103-
raise ValueError('Columns to be encoded can not contain new values')
103+
raise ValueError('Columns to be encoded cannot contain new values')
104104

105105
X = self.transform_contrast_coding(X, mapping=self.mapping)
106106
return X
@@ -124,7 +124,7 @@ def fit_contrast_coding(
124124
handle_missing: str
125125
How to handle missing values.
126126
handle_unknown: str
127-
How to hande unkown values.
127+
How to handle unknown values.
128128
129129
Returns
130130
-------

category_encoders/basen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def _transform(self, X):
187187

188188
if self.handle_unknown == 'error':
189189
if X_out[self.cols].isin([-1]).any().any():
190-
raise ValueError('Columns to be encoded can not contain new values')
190+
raise ValueError('Columns to be encoded cannot contain new values')
191191

192192
X_out = self.basen_encode(X_out, cols=self.cols)
193193
return X_out

category_encoders/cat_boost.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def _transform(self, X, y=None):
157157
is_unknown_value = X[col].isin(unseen_values.dropna().astype(object))
158158

159159
if self.handle_unknown == 'error' and is_unknown_value.any():
160-
raise ValueError('Columns to be encoded can not contain new values')
160+
raise ValueError('Columns to be encoded cannot contain new values')
161161

162162
if (
163163
y is None

category_encoders/glmm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class GLMMEncoder( util.SupervisedTransformerMixin ,util.BaseEncoder):
2828
Mixed effects models are a mature branch of statistics.
2929
2. No hyper-parameters to tune. The amount of shrinkage is automatically determined
3030
through the estimation process. In short, the less observations a category has and/or
31-
the more the outcome varies for a category. Then the higher the regularization
31+
the more that outcome varies for a category, the higher the regularization
3232
towards "the prior" or "grand mean".
3333
3. The technique is applicable for both continuous and binomial targets.
3434
If the target is continuous, the encoder returns regularized difference of the

category_encoders/hashing.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,11 +228,11 @@ def hash_chunk(hash_method: str, np_df: np.ndarray, N: int) -> np.ndarray:
228228
for val in row:
229229
if val is not None:
230230
hasher = hasher_constructor()
231-
# Computes an integer index from the hasher digest. The endian is
232-
# "big" as the code use to read:
231+
# Computes an integer index from the hasher digest. The endianness is
232+
# "big" as the code used to read:
233233
# column_index = int(hasher.hexdigest(), 16) % N
234-
# which is implicitly considering the hexdigest to be big endian,
235-
# even if the system is little endian.
234+
# which is implicitly considering the hexdigest to be big-endian,
235+
# even if the system is little-endian.
236236
# Building the index that way is about 30% faster than using the
237237
# hexdigest.
238238
hasher.update(bytes(str(val), 'utf-8'))

category_encoders/leave_one_out.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def _fit_column_map(series: pd.Series, y: pd.Series) -> pd.Series:
161161
def transform_leave_one_out(self, X: pd.DataFrame, y: pd.Series | None, mapping=None):
162162
"""Apply leave-one-out-encoding to a dataframe.
163163
164-
If a target is given the lable-mean is calculated without the target (left out).
164+
If a target is given the label-mean is calculated without the target (left out).
165165
Otherwise, the label mean from the fit step is taken.
166166
"""
167167
random_state_ = check_random_state(self.random_state)
@@ -184,7 +184,7 @@ def transform_leave_one_out(self, X: pd.DataFrame, y: pd.Series | None, mapping=
184184
X[col] = X[col].astype(index_dtype)
185185

186186
if self.handle_unknown == 'error' and is_unknown_value.any():
187-
raise ValueError('Columns to be encoded can not contain new values')
187+
raise ValueError('Columns to be encoded cannot contain new values')
188188

189189
if (
190190
y is None

category_encoders/one_hot.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class OneHotEncoder( util.UnsupervisedTransformerMixin,util.BaseEncoder):
4040
handle_missing: str
4141
options are 'error', 'return_nan', 'value', and 'indicator'. The default is 'value'.
4242
43-
'error' will raise a `ValueError` if missings are encountered.
43+
'error' will raise a `ValueError` if a missing value is encountered.
4444
'return_nan' will encode a missing value as `np.nan` in every dummy column.
4545
'value' will encode a missing value as 0 in every dummy column.
4646
'indicator' will treat missingness as its own category, adding an additional dummy column
@@ -227,7 +227,7 @@ def _transform(self, X):
227227

228228
if self.handle_unknown == 'error':
229229
if X[self.cols].isin([-1]).any().any():
230-
raise ValueError('Columns to be encoded can not contain new values')
230+
raise ValueError('Columns to be encoded cannot contain new values')
231231

232232
X = self.get_dummies(X)
233233
return X

category_encoders/rankhot.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ class RankHotEncoder( util.UnsupervisedTransformerMixin,util.BaseEncoder):
3232
handle_unknown: str
3333
options are 'error', 'value', 'return_nan'.
3434
The default is 'value'.
35-
'value': If an unknown label occurrs, it is represented as 0 array.
36-
'error': If an unknown label occurrs, error message is displayed.
37-
'return_nan': If an unknown label occurrs, np.nan is returned in all columns.
35+
'value': If an unknown label occurs, it is represented as 0 array.
36+
'error': If an unknown label occurs, error message is displayed.
37+
'return_nan': If an unknown label occurs, np.nan is returned in all columns.
3838
handle_missing: str
3939
options are 'error', 'value' and 'return_nan'. The default is 'value'.
4040
Missing value also considered as unknown value in the final data set.
@@ -146,7 +146,7 @@ def _transform(self, X_in: pd.DataFrame, override_return_df: bool = False) -> pd
146146

147147
if self.handle_unknown == 'error':
148148
if X[self.cols].isin([-1]).any().any():
149-
raise ValueError('Columns to be encoded can not contain new values')
149+
raise ValueError('Columns to be encoded cannot contain new values')
150150

151151
for switch, _ordinal_switch in zip(
152152
self.mapping, self.ordinal_encoder.category_mapping, strict=False

category_encoders/target_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ def target_encode(self, X_in: pd.DataFrame) -> pd.DataFrame:
301301
"""Apply target encoding via encoder mapping."""
302302
X = X_in.copy(deep=True)
303303

304-
# Was not mapping extra columns as self.featuer_names_in did not include new column
304+
# Was not mapping extra columns as self.feature_names_in did not include new column
305305
for col in self.cols:
306306
X[col] = X[col].map(self.mapping[col])
307307

category_encoders/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ def fit(self, X: X_type, y: y_type | None = None, **kwargs):
469469

470470
if self.handle_missing == 'error':
471471
if X[self.cols].isna().any().any():
472-
raise ValueError('Columns to be encoded can not contain null')
472+
raise ValueError('Columns to be encoded cannot contain null')
473473

474474
self._fit(X, y, **kwargs)
475475

@@ -504,7 +504,7 @@ def _check_fit_inputs(self, X: X_type, y: y_type) -> None:
504504
def _check_transform_inputs(self, df: pd.DataFrame) -> None:
505505
if self.handle_missing == 'error':
506506
if df[self.cols].isna().any().any():
507-
raise ValueError('Columns to be encoded can not contain null')
507+
raise ValueError('Columns to be encoded cannot contain null')
508508

509509
if self._dim is None:
510510
raise NotFittedError('Must train encoder before it can be used to transform data.')
@@ -600,7 +600,7 @@ def __sklearn_tags__(self) -> EncoderTags:
600600
def transform(self, X: X_type, y: y_type | None = None, override_return_df: bool = False):
601601
"""Perform the transformation to new categorical data.
602602
603-
Some encoders behave differently on whether y is given or not.
603+
Some encoders behave differently on whether or not y is given.
604604
This is mainly due to regularisation in order to avoid overfitting.
605605
On training data transform should be called with y, on test data without.
606606

0 commit comments

Comments
 (0)