Upload folder using huggingface_hub
Browse files- en_ID/model.pkl +1 -1
- en_ID/training_details.txt +15 -26
- en_NA-nb-OP/model.pkl +1 -1
- en_NA-nb-OP/training_details.txt +17 -26
- en_NA-nb/model.pkl +1 -1
- en_NA-nb/training_details.txt +15 -26
- fi_ID-NA/model.pkl +1 -1
- fi_ID-NA/training_details.txt +15 -26
- fi_ID/model.pkl +1 -1
- fi_ID/training_details.txt +15 -26
- fi_NA-nb-OP-rv/model.pkl +1 -1
- fi_NA-nb-OP-rv/training_details.txt +17 -26
- fi_NA-nb-OP/model.pkl +1 -1
- fi_NA-nb-OP/training_details.txt +16 -26
- fi_NA-nb/model.pkl +1 -1
- fi_NA-nb/training_details.txt +15 -26
- modeling_sm_subgroup_classifier.py +4 -7
- sv_HI-NA-nb-re/model.pkl +1 -1
- sv_HI-NA-nb-re/training_details.txt +15 -26
- sv_ID-NA-nb/model.pkl +1 -1
- sv_ID-NA-nb/training_details.txt +15 -26
- sv_ID/model.pkl +1 -1
- sv_ID/training_details.txt +16 -26
- sv_IN-NA-nb/model.pkl +1 -1
- sv_IN-NA-nb/training_details.txt +15 -26
- sv_NA-nb-OP-rv/model.pkl +1 -1
- sv_NA-nb-OP-rv/training_details.txt +15 -26
- sv_NA-nb-OP/model.pkl +1 -1
- sv_NA-nb-OP/training_details.txt +15 -26
- sv_NA-nb/model.pkl +1 -1
- sv_NA-nb/training_details.txt +15 -26
- sv_NA-ob-OP/model.pkl +1 -1
- sv_NA-ob-OP/training_details.txt +15 -26
- sv_ds-IP-NA-nb/model.pkl +1 -1
- sv_ds-IP-NA-nb/training_details.txt +16 -26
en_ID/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b92dd822e9fe0a9008577929e372d12b86acb64892c1d82b970ea8ef8266d7c0
|
| 3 |
size 9055
|
en_ID/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: en
|
| 5 |
Register: ID
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 41071
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'sports': 983, '': 40088}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0005
|
| 24 |
-
- CV confidence interval: 0.9984 ± 0.0010
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9988
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: en_ID
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: en
|
| 5 |
Register: ID
|
| 6 |
+
Training Date: 2025-09-26 14:45:51
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 41071
|
| 10 |
+
- Training: 32856
|
| 11 |
+
- Test: 8215
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: 'sports' (983 samples)
|
| 17 |
+
- 1: '' (40088 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9985
|
| 21 |
+
- CV Std: 0.0004
|
| 22 |
+
- Test Accuracy: 0.9990
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
en_NA-nb-OP/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 33671
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdf6213ec1998e9b7b840f60c5f2efee6d67b83b9a6fb83dbe1dbc0023edcfea
|
| 3 |
size 33671
|
en_NA-nb-OP/training_details.txt
CHANGED
|
@@ -1,39 +1,30 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: en
|
| 5 |
Register: NA-nb-OP
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 1536
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
-
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0104
|
| 24 |
-
- CV confidence interval: 0.9699 ± 0.0209
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9643
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: en_NA-nb-OP
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: en
|
| 5 |
Register: NA-nb-OP
|
| 6 |
+
Training Date: 2025-09-26 14:45:21
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 1536
|
| 10 |
+
- Training: 1228
|
| 11 |
+
- Test: 308
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 4
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (747 samples)
|
| 17 |
+
- 1: 'culture' (327 samples)
|
| 18 |
+
- 2: 'dining' (172 samples)
|
| 19 |
+
- 3: 'lifestyle' (290 samples)
|
| 20 |
|
| 21 |
+
Performance:
|
| 22 |
+
- CV Mean: 0.9715
|
| 23 |
+
- CV Std: 0.0089
|
| 24 |
+
- Test Accuracy: 0.9578
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 27 |
- Algorithm: Logistic Regression
|
| 28 |
- Regularization (C): 1.0
|
| 29 |
+
- Feature scaling: NONE
|
| 30 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
en_NA-nb/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca26eb969da61f5c5ed6b0d6293e19961b59f6175f3e9a1e33bee16f1028afbb
|
| 3 |
size 9055
|
en_NA-nb/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: en
|
| 5 |
Register: NA-nb
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 49026
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'': 46239, 'comments': 2787}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0006
|
| 24 |
-
- CV confidence interval: 0.9898 ± 0.0012
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9897
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: en_NA-nb
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: en
|
| 5 |
Register: NA-nb
|
| 6 |
+
Training Date: 2025-09-26 14:45:42
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 49026
|
| 10 |
+
- Training: 39220
|
| 11 |
+
- Test: 9806
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (46239 samples)
|
| 17 |
+
- 1: 'comments' (2787 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9912
|
| 21 |
+
- CV Std: 0.0004
|
| 22 |
+
- Test Accuracy: 0.9913
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fi_ID-NA/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa2a7c1041ee5eaa8356b7f207505f95d1d3737458f0e643bf36e65f971bf2c5
|
| 3 |
size 9055
|
fi_ID-NA/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: ID-NA
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 1729
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'': 389, 'comments': 1340}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0046
|
| 24 |
-
- CV confidence interval: 0.9892 ± 0.0092
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9913
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: fi_ID-NA
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: ID-NA
|
| 6 |
+
Training Date: 2025-09-26 14:43:17
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 1729
|
| 10 |
+
- Training: 1383
|
| 11 |
+
- Test: 346
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (389 samples)
|
| 17 |
+
- 1: 'comments' (1340 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9899
|
| 21 |
+
- CV Std: 0.0042
|
| 22 |
+
- Test Accuracy: 0.9884
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fi_ID/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75059136f8cd4874c39a66b86e9713dcf35e4aece62074b70d43654caa4d95f9
|
| 3 |
size 9055
|
fi_ID/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: ID
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 34973
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'': 34495, 'sports': 478}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0005
|
| 24 |
-
- CV confidence interval: 0.9985 ± 0.0010
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9986
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: fi_ID
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: ID
|
| 6 |
+
Training Date: 2025-09-26 14:45:20
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 34973
|
| 10 |
+
- Training: 27978
|
| 11 |
+
- Test: 6995
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (34495 samples)
|
| 17 |
+
- 1: 'sports' (478 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9989
|
| 21 |
+
- CV Std: 0.0007
|
| 22 |
+
- Test Accuracy: 0.9983
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fi_NA-nb-OP-rv/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 33671
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25423d27a82c163563dc7e5da487792e1dadc5148016e6c292cfa0a0253c2c10
|
| 3 |
size 33671
|
fi_NA-nb-OP-rv/training_details.txt
CHANGED
|
@@ -1,39 +1,30 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: NA-nb-OP-rv
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 4929
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
-
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0020
|
| 24 |
-
- CV confidence interval: 0.9914 ± 0.0041
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9899
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: fi_NA-nb-OP-rv
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: NA-nb-OP-rv
|
| 6 |
+
Training Date: 2025-09-26 14:43:21
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 4929
|
| 10 |
+
- Training: 3943
|
| 11 |
+
- Test: 986
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 4
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: 'books' (2958 samples)
|
| 17 |
+
- 1: 'dining' (294 samples)
|
| 18 |
+
- 2: 'beverages' (283 samples)
|
| 19 |
+
- 3: 'cosmetics' (1394 samples)
|
| 20 |
|
| 21 |
+
Performance:
|
| 22 |
+
- CV Mean: 0.9937
|
| 23 |
+
- CV Std: 0.0029
|
| 24 |
+
- Test Accuracy: 0.9909
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 27 |
- Algorithm: Logistic Regression
|
| 28 |
- Regularization (C): 1.0
|
| 29 |
+
- Feature scaling: NONE
|
| 30 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fi_NA-nb-OP/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 25471
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e7bd2415df7824f65aaee2a05720b955d26bfd9b6b5b69cb5da65f7c5bbbfdf
|
| 3 |
size 25471
|
fi_NA-nb-OP/training_details.txt
CHANGED
|
@@ -1,39 +1,29 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: NA-nb-OP
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 1969
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0094
|
| 24 |
-
- CV confidence interval: 0.9810 ± 0.0188
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9848
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: fi_NA-nb-OP
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: NA-nb-OP
|
| 6 |
+
Training Date: 2025-09-26 14:43:17
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 1969
|
| 10 |
+
- Training: 1575
|
| 11 |
+
- Test: 394
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 3
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (874 samples)
|
| 17 |
+
- 1: 'culture' (695 samples)
|
| 18 |
+
- 2: 'consumption' (400 samples)
|
| 19 |
|
| 20 |
+
Performance:
|
| 21 |
+
- CV Mean: 0.9848
|
| 22 |
+
- CV Std: 0.0071
|
| 23 |
+
- Test Accuracy: 0.9848
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 26 |
- Algorithm: Logistic Regression
|
| 27 |
- Regularization (C): 1.0
|
| 28 |
+
- Feature scaling: NONE
|
| 29 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fi_NA-nb/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e06ea9312851ec2dd78fa0b532960a7149031b2a33fd19608185d2830264237
|
| 3 |
size 9055
|
fi_NA-nb/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: NA-nb
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 218088
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'': 193558, 'comments': 24530}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0004
|
| 24 |
-
- CV confidence interval: 0.9929 ± 0.0008
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9927
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: fi_NA-nb
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: fi
|
| 5 |
Register: NA-nb
|
| 6 |
+
Training Date: 2025-09-26 14:45:14
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 218088
|
| 10 |
+
- Training: 174470
|
| 11 |
+
- Test: 43618
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (193558 samples)
|
| 17 |
+
- 1: 'comments' (24530 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9930
|
| 21 |
+
- CV Std: 0.0005
|
| 22 |
+
- Test Accuracy: 0.9934
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modeling_sm_subgroup_classifier.py
CHANGED
|
@@ -35,7 +35,7 @@ class SmSubgroupClassifier(PreTrainedModel):
|
|
| 35 |
item_path = os.path.join(self.model_dir, item)
|
| 36 |
if os.path.isdir(item_path):
|
| 37 |
# Verify it's a valid model directory
|
| 38 |
-
required_files = ["model.pkl", "
|
| 39 |
if all(
|
| 40 |
os.path.exists(os.path.join(item_path, f)) for f in required_files
|
| 41 |
):
|
|
@@ -60,13 +60,11 @@ class SmSubgroupClassifier(PreTrainedModel):
|
|
| 60 |
|
| 61 |
# Load components
|
| 62 |
classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
|
| 63 |
-
scaler = joblib.load(os.path.join(classifier_path, "scaler.pkl"))
|
| 64 |
with open(os.path.join(classifier_path, "metadata.pkl"), "rb") as f:
|
| 65 |
metadata = pickle.load(f)
|
| 66 |
|
| 67 |
classifier_info = {
|
| 68 |
"classifier": classifier,
|
| 69 |
-
"scaler": scaler,
|
| 70 |
"class_names": metadata["class_names"],
|
| 71 |
}
|
| 72 |
|
|
@@ -92,10 +90,9 @@ class SmSubgroupClassifier(PreTrainedModel):
|
|
| 92 |
# Load classifier
|
| 93 |
classifier_info = self._load_classifier(model_key)
|
| 94 |
|
| 95 |
-
#
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
probabilities = classifier_info["classifier"].predict_proba(embeddings_scaled)
|
| 99 |
|
| 100 |
# Format results - just use class names and probabilities
|
| 101 |
results = []
|
|
|
|
| 35 |
item_path = os.path.join(self.model_dir, item)
|
| 36 |
if os.path.isdir(item_path):
|
| 37 |
# Verify it's a valid model directory
|
| 38 |
+
required_files = ["model.pkl", "metadata.pkl"]
|
| 39 |
if all(
|
| 40 |
os.path.exists(os.path.join(item_path, f)) for f in required_files
|
| 41 |
):
|
|
|
|
| 60 |
|
| 61 |
# Load components
|
| 62 |
classifier = joblib.load(os.path.join(classifier_path, "model.pkl"))
|
|
|
|
| 63 |
with open(os.path.join(classifier_path, "metadata.pkl"), "rb") as f:
|
| 64 |
metadata = pickle.load(f)
|
| 65 |
|
| 66 |
classifier_info = {
|
| 67 |
"classifier": classifier,
|
|
|
|
| 68 |
"class_names": metadata["class_names"],
|
| 69 |
}
|
| 70 |
|
|
|
|
| 90 |
# Load classifier
|
| 91 |
classifier_info = self._load_classifier(model_key)
|
| 92 |
|
| 93 |
+
# Predict directly without scaling
|
| 94 |
+
predictions = classifier_info["classifier"].predict(embeddings)
|
| 95 |
+
probabilities = classifier_info["classifier"].predict_proba(embeddings)
|
|
|
|
| 96 |
|
| 97 |
# Format results - just use class names and probabilities
|
| 98 |
results = []
|
sv_HI-NA-nb-re/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb3ad2ab765d6810ff066fb512c4723a2bc4fdf4022dad67aa9d40aa71f5cf03
|
| 3 |
size 9055
|
sv_HI-NA-nb-re/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: HI-NA-nb-re
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 6804
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'crafts': 135, '': 6669}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0009
|
| 24 |
-
- CV confidence interval: 0.9960 ± 0.0019
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9956
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_HI-NA-nb-re
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: HI-NA-nb-re
|
| 6 |
+
Training Date: 2025-09-26 14:40:55
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 6804
|
| 10 |
+
- Training: 5443
|
| 11 |
+
- Test: 1361
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: 'crafts' (135 samples)
|
| 17 |
+
- 1: '' (6669 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9960
|
| 21 |
+
- CV Std: 0.0007
|
| 22 |
+
- Test Accuracy: 0.9963
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_ID-NA-nb/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb1f2cf418daafa57df0949e537e0edcfb147a459f8e2a5207cad14300efe5da
|
| 3 |
size 9055
|
sv_ID-NA-nb/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: ID-NA-nb
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 1189
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'': 852, 'comments': 337}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0177
|
| 24 |
-
- CV confidence interval: 0.9401 ± 0.0354
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9706
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_ID-NA-nb
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: ID-NA-nb
|
| 6 |
+
Training Date: 2025-09-26 14:40:59
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 1189
|
| 10 |
+
- Training: 951
|
| 11 |
+
- Test: 238
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (852 samples)
|
| 17 |
+
- 1: 'comments' (337 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9485
|
| 21 |
+
- CV Std: 0.0146
|
| 22 |
+
- Test Accuracy: 0.9622
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_ID/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 25471
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e34073c68bb3052b47f48110ef469aabf92e455e7015962344503b1b253c587
|
| 3 |
size 25471
|
sv_ID/training_details.txt
CHANGED
|
@@ -1,39 +1,29 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: ID
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 15060
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0017
|
| 24 |
-
- CV confidence interval: 0.9978 ± 0.0035
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9977
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_ID
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: ID
|
| 6 |
+
Training Date: 2025-09-26 14:43:16
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 15060
|
| 10 |
+
- Training: 12048
|
| 11 |
+
- Test: 3012
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 3
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (14108 samples)
|
| 17 |
+
- 1: 'sports' (542 samples)
|
| 18 |
+
- 2: 'help' (410 samples)
|
| 19 |
|
| 20 |
+
Performance:
|
| 21 |
+
- CV Mean: 0.9980
|
| 22 |
+
- CV Std: 0.0011
|
| 23 |
+
- Test Accuracy: 0.9990
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 26 |
- Algorithm: Logistic Regression
|
| 27 |
- Regularization (C): 1.0
|
| 28 |
+
- Feature scaling: NONE
|
| 29 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_IN-NA-nb/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b47ecae021e4c0ab2da743cd76b431e165a134894a540d43b9c98f7e595aacb5
|
| 3 |
size 9055
|
sv_IN-NA-nb/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: IN-NA-nb
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 1169
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'': 811, 'organizations': 358}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0188
|
| 24 |
-
- CV confidence interval: 0.9626 ± 0.0377
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9744
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_IN-NA-nb
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: IN-NA-nb
|
| 6 |
+
Training Date: 2025-09-26 14:40:58
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 1169
|
| 10 |
+
- Training: 935
|
| 11 |
+
- Test: 234
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (811 samples)
|
| 17 |
+
- 1: 'organizations' (358 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9668
|
| 21 |
+
- CV Std: 0.0201
|
| 22 |
+
- Test Accuracy: 0.9701
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_NA-nb-OP-rv/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:201f20021cf1391061d7ab0884b67e90e4c73342374b4114c614d53da9562519
|
| 3 |
size 9055
|
sv_NA-nb-OP-rv/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-nb-OP-rv
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 9422
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'lifestyle': 2731, 'culture': 6691}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0011
|
| 24 |
-
- CV confidence interval: 0.9946 ± 0.0023
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9952
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_NA-nb-OP-rv
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-nb-OP-rv
|
| 6 |
+
Training Date: 2025-09-26 14:40:59
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 9422
|
| 10 |
+
- Training: 7537
|
| 11 |
+
- Test: 1885
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: 'lifestyle' (2731 samples)
|
| 17 |
+
- 1: 'culture' (6691 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9952
|
| 21 |
+
- CV Std: 0.0013
|
| 22 |
+
- Test Accuracy: 0.9968
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_NA-nb-OP/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:282933f93c4b4fc9b72eff6fc541c9705e201a3c704b09ca5c1a7746eba25696
|
| 3 |
size 9055
|
sv_NA-nb-OP/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-nb-OP
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 6280
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'finance': 149, '': 6131}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0011
|
| 24 |
-
- CV confidence interval: 0.9960 ± 0.0022
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9960
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_NA-nb-OP
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-nb-OP
|
| 6 |
+
Training Date: 2025-09-26 14:40:58
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 6280
|
| 10 |
+
- Training: 5024
|
| 11 |
+
- Test: 1256
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: 'finance' (149 samples)
|
| 17 |
+
- 1: '' (6131 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9946
|
| 21 |
+
- CV Std: 0.0024
|
| 22 |
+
- Test Accuracy: 0.9968
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_NA-nb/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:defe8303b0eb2cfbcb1969d215b6fa25040d9438448196b1f70075bea6d1531e
|
| 3 |
size 9055
|
sv_NA-nb/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-nb
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 222925
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'': 203958, 'comments': 18967}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0005
|
| 24 |
-
- CV confidence interval: 0.9932 ± 0.0011
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9940
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_NA-nb
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-nb
|
| 6 |
+
Training Date: 2025-09-26 14:43:11
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 222925
|
| 10 |
+
- Training: 178340
|
| 11 |
+
- Test: 44585
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (203958 samples)
|
| 17 |
+
- 1: 'comments' (18967 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9939
|
| 21 |
+
- CV Std: 0.0003
|
| 22 |
+
- Test Accuracy: 0.9943
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_NA-ob-OP/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 9055
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d13112969c2a8c82c9f3bb7e1649ffff9953c86d09d82bbc337ecd51200c939
|
| 3 |
size 9055
|
sv_NA-ob-OP/training_details.txt
CHANGED
|
@@ -1,39 +1,28 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-ob-OP
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 2830
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
- Class distribution: {'sports': 175, 'general': 2655}
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0017
|
| 24 |
-
- CV confidence interval: 0.9938 ± 0.0033
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9929
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_NA-ob-OP
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: NA-ob-OP
|
| 6 |
+
Training Date: 2025-09-26 14:40:57
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 2830
|
| 10 |
+
- Training: 2264
|
| 11 |
+
- Test: 566
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 2
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: 'sports' (175 samples)
|
| 17 |
+
- 1: 'general' (2655 samples)
|
|
|
|
| 18 |
|
| 19 |
+
Performance:
|
| 20 |
+
- CV Mean: 0.9934
|
| 21 |
+
- CV Std: 0.0028
|
| 22 |
+
- Test Accuracy: 0.9912
|
|
|
|
|
|
|
| 23 |
|
| 24 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 25 |
- Algorithm: Logistic Regression
|
| 26 |
- Regularization (C): 1.0
|
| 27 |
+
- Feature scaling: NONE
|
| 28 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sv_ds-IP-NA-nb/model.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 25471
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:009e52f53327a30dfba1b53d8b883f5a72d89da81c5bec95f18cce24999b95bb
|
| 3 |
size 25471
|
sv_ds-IP-NA-nb/training_details.txt
CHANGED
|
@@ -1,39 +1,29 @@
|
|
| 1 |
-
Training Details
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: ds-IP-NA-nb
|
| 6 |
-
Training Date: 2025-09-26 14:
|
| 7 |
|
| 8 |
-
|
| 9 |
- Total samples: 2357
|
| 10 |
-
- Training
|
| 11 |
-
- Test
|
| 12 |
-
-
|
|
|
|
| 13 |
|
| 14 |
Classes:
|
| 15 |
-
-
|
| 16 |
-
-
|
| 17 |
-
-
|
| 18 |
|
| 19 |
-
|
| 20 |
-
- CV
|
| 21 |
-
- CV
|
| 22 |
-
-
|
| 23 |
-
- CV std: 0.0051
|
| 24 |
-
- CV confidence interval: 0.9820 ± 0.0103
|
| 25 |
|
| 26 |
-
|
| 27 |
-
- Test accuracy: 0.9788
|
| 28 |
-
|
| 29 |
-
Model Configuration:
|
| 30 |
- Algorithm: Logistic Regression
|
| 31 |
- Regularization (C): 1.0
|
| 32 |
-
- Feature scaling:
|
| 33 |
- Random state: 42
|
| 34 |
-
|
| 35 |
-
Files:
|
| 36 |
-
- Classifier: model.pkl
|
| 37 |
-
- Scaler: scaler.pkl
|
| 38 |
-
- Metadata: metadata.pkl
|
| 39 |
-
- This file: training_details.txt
|
|
|
|
| 1 |
+
Training Details: sv_ds-IP-NA-nb
|
| 2 |
========================================
|
| 3 |
|
| 4 |
Language: sv
|
| 5 |
Register: ds-IP-NA-nb
|
| 6 |
+
Training Date: 2025-09-26 14:40:56
|
| 7 |
|
| 8 |
+
Dataset:
|
| 9 |
- Total samples: 2357
|
| 10 |
+
- Training: 1885
|
| 11 |
+
- Test: 472
|
| 12 |
+
- Features: 1024
|
| 13 |
+
- Classes: 3
|
| 14 |
|
| 15 |
Classes:
|
| 16 |
+
- 0: '' (1933 samples)
|
| 17 |
+
- 1: 'travel' (268 samples)
|
| 18 |
+
- 2: 'contests' (156 samples)
|
| 19 |
|
| 20 |
+
Performance:
|
| 21 |
+
- CV Mean: 0.9830
|
| 22 |
+
- CV Std: 0.0036
|
| 23 |
+
- Test Accuracy: 0.9831
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
Configuration:
|
|
|
|
|
|
|
|
|
|
| 26 |
- Algorithm: Logistic Regression
|
| 27 |
- Regularization (C): 1.0
|
| 28 |
+
- Feature scaling: NONE
|
| 29 |
- Random state: 42
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|