erikhenriksson commited on
Commit
57b4170
·
verified ·
1 Parent(s): 77a8ece

Upload folder using huggingface_hub

Browse files
en_ID/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48dc684753e19792ce04cadeae9025441fc6507396eb93ab6dc740f87f1f3dad
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b92dd822e9fe0a9008577929e372d12b86acb64892c1d82b970ea8ef8266d7c0
3
  size 9055
en_ID/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for en_ID
2
  ========================================
3
 
4
  Language: en
5
  Register: ID
6
- Training Date: 2025-09-26 14:09:04
7
 
8
- Data Summary:
9
  - Total samples: 41071
10
- - Training samples: 32856
11
- - Test samples: 8215
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: 'sports', ''
17
- - Class distribution: {'sports': 983, '': 40088}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9984783931832014, 0.9987825292953888, 0.9975650585907777, 0.9989347131334653, 0.9981737939430833]
22
- - CV mean: 0.9984
23
- - CV std: 0.0005
24
- - CV confidence interval: 0.9984 ± 0.0010
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9988
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: en_ID
2
  ========================================
3
 
4
  Language: en
5
  Register: ID
6
+ Training Date: 2025-09-26 14:45:51
7
 
8
+ Dataset:
9
  - Total samples: 41071
10
+ - Training: 32856
11
+ - Test: 8215
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: 'sports' (983 samples)
17
+ - 1: '' (40088 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9985
21
+ - CV Std: 0.0004
22
+ - Test Accuracy: 0.9990
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
en_NA-nb-OP/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f83d56eccb7a610a0b57baa16d63af8839c571455358919a4d892bcf581a66e
3
  size 33671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf6213ec1998e9b7b840f60c5f2efee6d67b83b9a6fb83dbe1dbc0023edcfea
3
  size 33671
en_NA-nb-OP/training_details.txt CHANGED
@@ -1,39 +1,30 @@
1
- Training Details for en_NA-nb-OP
2
  ========================================
3
 
4
  Language: en
5
  Register: NA-nb-OP
6
- Training Date: 2025-09-26 14:08:46
7
 
8
- Data Summary:
9
  - Total samples: 1536
10
- - Training samples: 1228
11
- - Test samples: 308
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 4
16
- - Class names: '', 'culture', 'dining', 'lifestyle'
17
- - Class distribution: {'': 747, 'culture': 327, 'dining': 172, 'lifestyle': 290}
 
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9634146341463414, 0.959349593495935, 0.9634146341463414, 0.9877551020408163, 0.9755102040816327]
22
- - CV mean: 0.9699
23
- - CV std: 0.0104
24
- - CV confidence interval: 0.9699 ± 0.0209
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9643
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: en_NA-nb-OP
2
  ========================================
3
 
4
  Language: en
5
  Register: NA-nb-OP
6
+ Training Date: 2025-09-26 14:45:21
7
 
8
+ Dataset:
9
  - Total samples: 1536
10
+ - Training: 1228
11
+ - Test: 308
12
+ - Features: 1024
13
+ - Classes: 4
14
 
15
  Classes:
16
+ - 0: '' (747 samples)
17
+ - 1: 'culture' (327 samples)
18
+ - 2: 'dining' (172 samples)
19
+ - 3: 'lifestyle' (290 samples)
20
 
21
+ Performance:
22
+ - CV Mean: 0.9715
23
+ - CV Std: 0.0089
24
+ - Test Accuracy: 0.9578
 
 
25
 
26
+ Configuration:
 
 
 
27
  - Algorithm: Logistic Regression
28
  - Regularization (C): 1.0
29
+ - Feature scaling: NONE
30
  - Random state: 42
 
 
 
 
 
 
en_NA-nb/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97142d5962fb70803f26b449fa21ced30015b550de8604dc850ad50c07bee259
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca26eb969da61f5c5ed6b0d6293e19961b59f6175f3e9a1e33bee16f1028afbb
3
  size 9055
en_NA-nb/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for en_NA-nb
2
  ========================================
3
 
4
  Language: en
5
  Register: NA-nb
6
- Training Date: 2025-09-26 14:08:58
7
 
8
- Data Summary:
9
  - Total samples: 49026
10
- - Training samples: 39220
11
- - Test samples: 9806
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: '', 'comments'
17
- - Class distribution: {'': 46239, 'comments': 2787}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.989673635900051, 0.9886537480877103, 0.9903110657827638, 0.9899286078531362, 0.9903110657827638]
22
- - CV mean: 0.9898
23
- - CV std: 0.0006
24
- - CV confidence interval: 0.9898 ± 0.0012
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9897
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: en_NA-nb
2
  ========================================
3
 
4
  Language: en
5
  Register: NA-nb
6
+ Training Date: 2025-09-26 14:45:42
7
 
8
+ Dataset:
9
  - Total samples: 49026
10
+ - Training: 39220
11
+ - Test: 9806
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: '' (46239 samples)
17
+ - 1: 'comments' (2787 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9912
21
+ - CV Std: 0.0004
22
+ - Test Accuracy: 0.9913
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
fi_ID-NA/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91641ec86de57b7e5bc916dd05cad1bc6189dcd58a67f4f3cc8516af6ac5b228
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa2a7c1041ee5eaa8356b7f207505f95d1d3737458f0e643bf36e65f971bf2c5
3
  size 9055
fi_ID-NA/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for fi_ID-NA
2
  ========================================
3
 
4
  Language: fi
5
  Register: ID-NA
6
- Training Date: 2025-09-26 14:06:57
7
 
8
- Data Summary:
9
  - Total samples: 1729
10
- - Training samples: 1383
11
- - Test samples: 346
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: '', 'comments'
17
- - Class distribution: {'': 389, 'comments': 1340}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9855595667870036, 0.9927797833935018, 0.9927797833935018, 0.9927536231884058, 0.9818840579710145]
22
- - CV mean: 0.9892
23
- - CV std: 0.0046
24
- - CV confidence interval: 0.9892 ± 0.0092
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9913
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: fi_ID-NA
2
  ========================================
3
 
4
  Language: fi
5
  Register: ID-NA
6
+ Training Date: 2025-09-26 14:43:17
7
 
8
+ Dataset:
9
  - Total samples: 1729
10
+ - Training: 1383
11
+ - Test: 346
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: '' (389 samples)
17
+ - 1: 'comments' (1340 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9899
21
+ - CV Std: 0.0042
22
+ - Test Accuracy: 0.9884
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
fi_ID/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15dc245f640dd24687143cb21fdedf34968f9de671aec463f8bbc91bec7ea88f
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75059136f8cd4874c39a66b86e9713dcf35e4aece62074b70d43654caa4d95f9
3
  size 9055
fi_ID/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for fi_ID
2
  ========================================
3
 
4
  Language: fi
5
  Register: ID
6
- Training Date: 2025-09-26 14:08:45
7
 
8
- Data Summary:
9
  - Total samples: 34973
10
- - Training samples: 27978
11
- - Test samples: 6995
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: '', 'sports'
17
- - Class distribution: {'': 34495, 'sports': 478}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9980343102215868, 0.9987491065046462, 0.9992852037169406, 0.9987488829311886, 0.9978552278820375]
22
- - CV mean: 0.9985
23
- - CV std: 0.0005
24
- - CV confidence interval: 0.9985 ± 0.0010
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9986
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: fi_ID
2
  ========================================
3
 
4
  Language: fi
5
  Register: ID
6
+ Training Date: 2025-09-26 14:45:20
7
 
8
+ Dataset:
9
  - Total samples: 34973
10
+ - Training: 27978
11
+ - Test: 6995
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: '' (34495 samples)
17
+ - 1: 'sports' (478 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9989
21
+ - CV Std: 0.0007
22
+ - Test Accuracy: 0.9983
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
fi_NA-nb-OP-rv/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83cfc695439d258effeb25e30a6677c1c0e18b8d07c3a4e1c36738eac4b800ac
3
  size 33671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25423d27a82c163563dc7e5da487792e1dadc5148016e6c292cfa0a0253c2c10
3
  size 33671
fi_NA-nb-OP-rv/training_details.txt CHANGED
@@ -1,39 +1,30 @@
1
- Training Details for fi_NA-nb-OP-rv
2
  ========================================
3
 
4
  Language: fi
5
  Register: NA-nb-OP-rv
6
- Training Date: 2025-09-26 14:06:58
7
 
8
- Data Summary:
9
  - Total samples: 4929
10
- - Training samples: 3943
11
- - Test samples: 986
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 4
16
- - Class names: 'books', 'dining', 'beverages', 'cosmetics'
17
- - Class distribution: {'books': 2958, 'dining': 294, 'beverages': 283, 'cosmetics': 1394}
 
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9898605830164765, 0.9936628643852978, 0.9936628643852978, 0.9885786802030457, 0.9911167512690355]
22
- - CV mean: 0.9914
23
- - CV std: 0.0020
24
- - CV confidence interval: 0.9914 ± 0.0041
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9899
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: fi_NA-nb-OP-rv
2
  ========================================
3
 
4
  Language: fi
5
  Register: NA-nb-OP-rv
6
+ Training Date: 2025-09-26 14:43:21
7
 
8
+ Dataset:
9
  - Total samples: 4929
10
+ - Training: 3943
11
+ - Test: 986
12
+ - Features: 1024
13
+ - Classes: 4
14
 
15
  Classes:
16
+ - 0: 'books' (2958 samples)
17
+ - 1: 'dining' (294 samples)
18
+ - 2: 'beverages' (283 samples)
19
+ - 3: 'cosmetics' (1394 samples)
20
 
21
+ Performance:
22
+ - CV Mean: 0.9937
23
+ - CV Std: 0.0029
24
+ - Test Accuracy: 0.9909
 
 
25
 
26
+ Configuration:
 
 
 
27
  - Algorithm: Logistic Regression
28
  - Regularization (C): 1.0
29
+ - Feature scaling: NONE
30
  - Random state: 42
 
 
 
 
 
 
fi_NA-nb-OP/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f00c094692e5bb83931d54deef16ef5f7b807f0537d15a1ed6345acab21e88f
3
  size 25471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e7bd2415df7824f65aaee2a05720b955d26bfd9b6b5b69cb5da65f7c5bbbfdf
3
  size 25471
fi_NA-nb-OP/training_details.txt CHANGED
@@ -1,39 +1,29 @@
1
- Training Details for fi_NA-nb-OP
2
  ========================================
3
 
4
  Language: fi
5
  Register: NA-nb-OP
6
- Training Date: 2025-09-26 14:06:57
7
 
8
- Data Summary:
9
  - Total samples: 1969
10
- - Training samples: 1575
11
- - Test samples: 394
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 3
16
- - Class names: '', 'culture', 'consumption'
17
- - Class distribution: {'': 874, 'culture': 695, 'consumption': 400}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9841269841269841, 0.9936507936507937, 0.9841269841269841, 0.9777777777777777, 0.9650793650793651]
22
- - CV mean: 0.9810
23
- - CV std: 0.0094
24
- - CV confidence interval: 0.9810 ± 0.0188
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9848
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: fi_NA-nb-OP
2
  ========================================
3
 
4
  Language: fi
5
  Register: NA-nb-OP
6
+ Training Date: 2025-09-26 14:43:17
7
 
8
+ Dataset:
9
  - Total samples: 1969
10
+ - Training: 1575
11
+ - Test: 394
12
+ - Features: 1024
13
+ - Classes: 3
14
 
15
  Classes:
16
+ - 0: '' (874 samples)
17
+ - 1: 'culture' (695 samples)
18
+ - 2: 'consumption' (400 samples)
19
 
20
+ Performance:
21
+ - CV Mean: 0.9848
22
+ - CV Std: 0.0071
23
+ - Test Accuracy: 0.9848
 
 
24
 
25
+ Configuration:
 
 
 
26
  - Algorithm: Logistic Regression
27
  - Regularization (C): 1.0
28
+ - Feature scaling: NONE
29
  - Random state: 42
 
 
 
 
 
 
fi_NA-nb/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73957b464aecf86217f464d4db098528d7f7447904f721044e0a93987372e896
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e06ea9312851ec2dd78fa0b532960a7149031b2a33fd19608185d2830264237
3
  size 9055
fi_NA-nb/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for fi_NA-nb
2
  ========================================
3
 
4
  Language: fi
5
  Register: NA-nb
6
- Training Date: 2025-09-26 14:08:40
7
 
8
- Data Summary:
9
  - Total samples: 218088
10
- - Training samples: 174470
11
- - Test samples: 43618
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: '', 'comments'
17
- - Class distribution: {'': 193558, 'comments': 24530}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9926061787126726, 0.9932939760417264, 0.9922622800481458, 0.9933512924858142, 0.992778128044936]
22
- - CV mean: 0.9929
23
- - CV std: 0.0004
24
- - CV confidence interval: 0.9929 ± 0.0008
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9927
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: fi_NA-nb
2
  ========================================
3
 
4
  Language: fi
5
  Register: NA-nb
6
+ Training Date: 2025-09-26 14:45:14
7
 
8
+ Dataset:
9
  - Total samples: 218088
10
+ - Training: 174470
11
+ - Test: 43618
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: '' (193558 samples)
17
+ - 1: 'comments' (24530 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9930
21
+ - CV Std: 0.0005
22
+ - Test Accuracy: 0.9934
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
modeling_sm_subgroup_classifier.py CHANGED
@@ -35,7 +35,7 @@ class SmSubgroupClassifier(PreTrainedModel):
35
  item_path = os.path.join(self.model_dir, item)
36
  if os.path.isdir(item_path):
37
  # Verify it's a valid model directory
38
- required_files = ["model.pkl", "scaler.pkl", "metadata.pkl"]
39
  if all(
40
  os.path.exists(os.path.join(item_path, f)) for f in required_files
41
  ):
@@ -60,13 +60,11 @@ class SmSubgroupClassifier(PreTrainedModel):
60
 
61
  # Load components
62
  classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
63
- scaler = joblib.load(os.path.join(classifier_path, "scaler.pkl"))
64
  with open(os.path.join(classifier_path, "metadata.pkl"), "rb") as f:
65
  metadata = pickle.load(f)
66
 
67
  classifier_info = {
68
  "classifier": classifier,
69
- "scaler": scaler,
70
  "class_names": metadata["class_names"],
71
  }
72
 
@@ -92,10 +90,9 @@ class SmSubgroupClassifier(PreTrainedModel):
92
  # Load classifier
93
  classifier_info = self._load_classifier(model_key)
94
 
95
- # Scale and predict
96
- embeddings_scaled = classifier_info["scaler"].transform(embeddings)
97
- predictions = classifier_info["classifier"].predict(embeddings_scaled)
98
- probabilities = classifier_info["classifier"].predict_proba(embeddings_scaled)
99
 
100
  # Format results - just use class names and probabilities
101
  results = []
 
35
  item_path = os.path.join(self.model_dir, item)
36
  if os.path.isdir(item_path):
37
  # Verify it's a valid model directory
38
+ required_files = ["model.pkl", "metadata.pkl"]
39
  if all(
40
  os.path.exists(os.path.join(item_path, f)) for f in required_files
41
  ):
 
60
 
61
  # Load components
62
  classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
 
63
  with open(os.path.join(classifier_path, "metadata.pkl"), "rb") as f:
64
  metadata = pickle.load(f)
65
 
66
  classifier_info = {
67
  "classifier": classifier,
 
68
  "class_names": metadata["class_names"],
69
  }
70
 
 
90
  # Load classifier
91
  classifier_info = self._load_classifier(model_key)
92
 
93
+ # Predict directly without scaling
94
+ predictions = classifier_info["classifier"].predict(embeddings)
95
+ probabilities = classifier_info["classifier"].predict_proba(embeddings)
 
96
 
97
  # Format results - just use class names and probabilities
98
  results = []
sv_HI-NA-nb-re/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45b36afa0b2d6b74474f7ec1ae3779322e637f64af1bfe522083fe356a22a194
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb3ad2ab765d6810ff066fb512c4723a2bc4fdf4022dad67aa9d40aa71f5cf03
3
  size 9055
sv_HI-NA-nb-re/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for sv_HI-NA-nb-re
2
  ========================================
3
 
4
  Language: sv
5
  Register: HI-NA-nb-re
6
- Training Date: 2025-09-26 14:05:16
7
 
8
- Data Summary:
9
  - Total samples: 6804
10
- - Training samples: 5443
11
- - Test samples: 1361
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: 'crafts', ''
17
- - Class distribution: {'crafts': 135, '': 6669}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9963269054178145, 0.9963269054178145, 0.9972451790633609, 0.9944852941176471, 0.9954044117647058]
22
- - CV mean: 0.9960
23
- - CV std: 0.0009
24
- - CV confidence interval: 0.9960 ± 0.0019
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9956
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_HI-NA-nb-re
2
  ========================================
3
 
4
  Language: sv
5
  Register: HI-NA-nb-re
6
+ Training Date: 2025-09-26 14:40:55
7
 
8
+ Dataset:
9
  - Total samples: 6804
10
+ - Training: 5443
11
+ - Test: 1361
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: 'crafts' (135 samples)
17
+ - 1: '' (6669 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9960
21
+ - CV Std: 0.0007
22
+ - Test Accuracy: 0.9963
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
sv_ID-NA-nb/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3e6a7f1c0e31e315d84e43bc75299c48fe02215b7def50de7fc350316f807cb
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb1f2cf418daafa57df0949e537e0edcfb147a459f8e2a5207cad14300efe5da
3
  size 9055
sv_ID-NA-nb/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for sv_ID-NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: ID-NA-nb
6
- Training Date: 2025-09-26 14:05:20
7
 
8
- Data Summary:
9
  - Total samples: 1189
10
- - Training samples: 951
11
- - Test samples: 238
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: '', 'comments'
17
- - Class distribution: {'': 852, 'comments': 337}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9214659685863874, 0.9526315789473684, 0.9315789473684211, 0.968421052631579, 0.9263157894736842]
22
- - CV mean: 0.9401
23
- - CV std: 0.0177
24
- - CV confidence interval: 0.9401 ± 0.0354
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9706
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_ID-NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: ID-NA-nb
6
+ Training Date: 2025-09-26 14:40:59
7
 
8
+ Dataset:
9
  - Total samples: 1189
10
+ - Training: 951
11
+ - Test: 238
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: '' (852 samples)
17
+ - 1: 'comments' (337 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9485
21
+ - CV Std: 0.0146
22
+ - Test Accuracy: 0.9622
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
sv_ID/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f22126201e503512b9bbf51ed4c310d6e59ae89dd12e37835154c15a382d2612
3
  size 25471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e34073c68bb3052b47f48110ef469aabf92e455e7015962344503b1b253c587
3
  size 25471
sv_ID/training_details.txt CHANGED
@@ -1,39 +1,29 @@
1
- Training Details for sv_ID
2
  ========================================
3
 
4
  Language: sv
5
  Register: ID
6
- Training Date: 2025-09-26 14:06:57
7
 
8
- Data Summary:
9
  - Total samples: 15060
10
- - Training samples: 12048
11
- - Test samples: 3012
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 3
16
- - Class names: '', 'sports', 'help'
17
- - Class distribution: {'': 14108, 'sports': 542, 'help': 410}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9962655601659751, 0.9954356846473029, 0.9983402489626556, 1.0, 0.9991697799916978]
22
- - CV mean: 0.9978
23
- - CV std: 0.0017
24
- - CV confidence interval: 0.9978 ± 0.0035
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9977
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_ID
2
  ========================================
3
 
4
  Language: sv
5
  Register: ID
6
+ Training Date: 2025-09-26 14:43:16
7
 
8
+ Dataset:
9
  - Total samples: 15060
10
+ - Training: 12048
11
+ - Test: 3012
12
+ - Features: 1024
13
+ - Classes: 3
14
 
15
  Classes:
16
+ - 0: '' (14108 samples)
17
+ - 1: 'sports' (542 samples)
18
+ - 2: 'help' (410 samples)
19
 
20
+ Performance:
21
+ - CV Mean: 0.9980
22
+ - CV Std: 0.0011
23
+ - Test Accuracy: 0.9990
 
 
24
 
25
+ Configuration:
 
 
 
26
  - Algorithm: Logistic Regression
27
  - Regularization (C): 1.0
28
+ - Feature scaling: NONE
29
  - Random state: 42
 
 
 
 
 
 
sv_IN-NA-nb/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3247c4a22da33aab41f2e67513baa5a91675fb48eca867774eb3eb23c08bb827
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b47ecae021e4c0ab2da743cd76b431e165a134894a540d43b9c98f7e595aacb5
3
  size 9055
sv_IN-NA-nb/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for sv_IN-NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: IN-NA-nb
6
- Training Date: 2025-09-26 14:05:18
7
 
8
- Data Summary:
9
  - Total samples: 1169
10
- - Training samples: 935
11
- - Test samples: 234
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: '', 'organizations'
17
- - Class distribution: {'': 811, 'organizations': 358}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.93048128342246, 0.9893048128342246, 0.9625668449197861, 0.9679144385026738, 0.9625668449197861]
22
- - CV mean: 0.9626
23
- - CV std: 0.0188
24
- - CV confidence interval: 0.9626 ± 0.0377
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9744
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_IN-NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: IN-NA-nb
6
+ Training Date: 2025-09-26 14:40:58
7
 
8
+ Dataset:
9
  - Total samples: 1169
10
+ - Training: 935
11
+ - Test: 234
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: '' (811 samples)
17
+ - 1: 'organizations' (358 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9668
21
+ - CV Std: 0.0201
22
+ - Test Accuracy: 0.9701
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
sv_NA-nb-OP-rv/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:682ff2cca0e847dba7ab531629e37efb7ad74f9ec844a584863d6ff6494608ca
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:201f20021cf1391061d7ab0884b67e90e4c73342374b4114c614d53da9562519
3
  size 9055
sv_NA-nb-OP-rv/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for sv_NA-nb-OP-rv
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-nb-OP-rv
6
- Training Date: 2025-09-26 14:05:20
7
 
8
- Data Summary:
9
  - Total samples: 9422
10
- - Training samples: 7537
11
- - Test samples: 1885
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: 'lifestyle', 'culture'
17
- - Class distribution: {'lifestyle': 2731, 'culture': 6691}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9927055702917772, 0.9953580901856764, 0.9940278699402787, 0.9960185799601858, 0.9946914399469144]
22
- - CV mean: 0.9946
23
- - CV std: 0.0011
24
- - CV confidence interval: 0.9946 ± 0.0023
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9952
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_NA-nb-OP-rv
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-nb-OP-rv
6
+ Training Date: 2025-09-26 14:40:59
7
 
8
+ Dataset:
9
  - Total samples: 9422
10
+ - Training: 7537
11
+ - Test: 1885
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: 'lifestyle' (2731 samples)
17
+ - 1: 'culture' (6691 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9952
21
+ - CV Std: 0.0013
22
+ - Test Accuracy: 0.9968
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
sv_NA-nb-OP/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:720fc8f577288692a18f9ea11fde030737470791c5f91fc0b1c298d03f673fd1
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282933f93c4b4fc9b72eff6fc541c9705e201a3c704b09ca5c1a7746eba25696
3
  size 9055
sv_NA-nb-OP/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for sv_NA-nb-OP
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-nb-OP
6
- Training Date: 2025-09-26 14:05:18
7
 
8
- Data Summary:
9
  - Total samples: 6280
10
- - Training samples: 5024
11
- - Test samples: 1256
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: 'finance', ''
17
- - Class distribution: {'finance': 149, '': 6131}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9960199004975124, 0.9950248756218906, 0.9980099502487563, 0.9960199004975124, 0.9950199203187251]
22
- - CV mean: 0.9960
23
- - CV std: 0.0011
24
- - CV confidence interval: 0.9960 ± 0.0022
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9960
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_NA-nb-OP
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-nb-OP
6
+ Training Date: 2025-09-26 14:40:58
7
 
8
+ Dataset:
9
  - Total samples: 6280
10
+ - Training: 5024
11
+ - Test: 1256
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: 'finance' (149 samples)
17
+ - 1: '' (6131 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9946
21
+ - CV Std: 0.0024
22
+ - Test Accuracy: 0.9968
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
sv_NA-nb/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe83ba75d6108719e90f50a6eef1aa4012c90e1fb8563ae31c098bc5608d7ac
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:defe8303b0eb2cfbcb1969d215b6fa25040d9438448196b1f70075bea6d1531e
3
  size 9055
sv_NA-nb/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for sv_NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-nb
6
- Training Date: 2025-09-26 14:06:54
7
 
8
- Data Summary:
9
  - Total samples: 222925
10
- - Training samples: 178340
11
- - Test samples: 44585
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: '', 'comments'
17
- - Class distribution: {'': 203958, 'comments': 18967}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.99290680722216, 0.9922339351799933, 0.9935516429292363, 0.9936918246046876, 0.993523606594146]
22
- - CV mean: 0.9932
23
- - CV std: 0.0005
24
- - CV confidence interval: 0.9932 ± 0.0011
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9940
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-nb
6
+ Training Date: 2025-09-26 14:43:11
7
 
8
+ Dataset:
9
  - Total samples: 222925
10
+ - Training: 178340
11
+ - Test: 44585
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: '' (203958 samples)
17
+ - 1: 'comments' (18967 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9939
21
+ - CV Std: 0.0003
22
+ - Test Accuracy: 0.9943
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
sv_NA-ob-OP/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4508c12d8e95a73bc9d7e742f2b67e1bb5051b4480937f8597f94bf95d782cd3
3
  size 9055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d13112969c2a8c82c9f3bb7e1649ffff9953c86d09d82bbc337ecd51200c939
3
  size 9055
sv_NA-ob-OP/training_details.txt CHANGED
@@ -1,39 +1,28 @@
1
- Training Details for sv_NA-ob-OP
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-ob-OP
6
- Training Date: 2025-09-26 14:05:17
7
 
8
- Data Summary:
9
  - Total samples: 2830
10
- - Training samples: 2264
11
- - Test samples: 566
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 2
16
- - Class names: 'sports', 'general'
17
- - Class distribution: {'sports': 175, 'general': 2655}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9933774834437086, 0.9933774834437086, 0.9955849889624724, 0.9955849889624724, 0.9911504424778761]
22
- - CV mean: 0.9938
23
- - CV std: 0.0017
24
- - CV confidence interval: 0.9938 ± 0.0033
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9929
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_NA-ob-OP
2
  ========================================
3
 
4
  Language: sv
5
  Register: NA-ob-OP
6
+ Training Date: 2025-09-26 14:40:57
7
 
8
+ Dataset:
9
  - Total samples: 2830
10
+ - Training: 2264
11
+ - Test: 566
12
+ - Features: 1024
13
+ - Classes: 2
14
 
15
  Classes:
16
+ - 0: 'sports' (175 samples)
17
+ - 1: 'general' (2655 samples)
 
18
 
19
+ Performance:
20
+ - CV Mean: 0.9934
21
+ - CV Std: 0.0028
22
+ - Test Accuracy: 0.9912
 
 
23
 
24
+ Configuration:
 
 
 
25
  - Algorithm: Logistic Regression
26
  - Regularization (C): 1.0
27
+ - Feature scaling: NONE
28
  - Random state: 42
 
 
 
 
 
 
sv_ds-IP-NA-nb/model.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:984ebee2366c6b4ea8c0883fd66e5095f4fbeacb91fbea6ecd5b9dc90a184c99
3
  size 25471
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:009e52f53327a30dfba1b53d8b883f5a72d89da81c5bec95f18cce24999b95bb
3
  size 25471
sv_ds-IP-NA-nb/training_details.txt CHANGED
@@ -1,39 +1,29 @@
1
- Training Details for sv_ds-IP-NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: ds-IP-NA-nb
6
- Training Date: 2025-09-26 14:05:17
7
 
8
- Data Summary:
9
  - Total samples: 2357
10
- - Training samples: 1885
11
- - Test samples: 472
12
- - Embedding dimension: 1024
 
13
 
14
  Classes:
15
- - Number of classes: 3
16
- - Class names: '', 'travel', 'contests'
17
- - Class distribution: {'': 1933, 'travel': 268, 'contests': 156}
18
 
19
- Cross-Validation Results:
20
- - CV folds: 5
21
- - CV scores: [0.9761273209549072, 0.9840848806366048, 0.9840848806366048, 0.9761273209549072, 0.9893899204244032]
22
- - CV mean: 0.9820
23
- - CV std: 0.0051
24
- - CV confidence interval: 0.9820 ± 0.0103
25
 
26
- Final Performance:
27
- - Test accuracy: 0.9788
28
-
29
- Model Configuration:
30
  - Algorithm: Logistic Regression
31
  - Regularization (C): 1.0
32
- - Feature scaling: StandardScaler
33
  - Random state: 42
34
-
35
- Files:
36
- - Classifier: model.pkl
37
- - Scaler: scaler.pkl
38
- - Metadata: metadata.pkl
39
- - This file: training_details.txt
 
1
+ Training Details: sv_ds-IP-NA-nb
2
  ========================================
3
 
4
  Language: sv
5
  Register: ds-IP-NA-nb
6
+ Training Date: 2025-09-26 14:40:56
7
 
8
+ Dataset:
9
  - Total samples: 2357
10
+ - Training: 1885
11
+ - Test: 472
12
+ - Features: 1024
13
+ - Classes: 3
14
 
15
  Classes:
16
+ - 0: '' (1933 samples)
17
+ - 1: 'travel' (268 samples)
18
+ - 2: 'contests' (156 samples)
19
 
20
+ Performance:
21
+ - CV Mean: 0.9830
22
+ - CV Std: 0.0036
23
+ - Test Accuracy: 0.9831
 
 
24
 
25
+ Configuration:
 
 
 
26
  - Algorithm: Logistic Regression
27
  - Regularization (C): 1.0
28
+ - Feature scaling: NONE
29
  - Random state: 42