[2]:
import pandas as pd
from scipy.io import arff
from rulekit import RuleKit
from rulekit.regression import RuleRegressor
from rulekit.params import Measures
from rulexai.explainer import RuleExplainer
CPU¶
Read data¶
[3]:
dataset_path = "./data/cpu.arff"
data = pd.DataFrame(arff.loadarff(dataset_path)[0])
# code to change encoding of the file
tmp_df = data.select_dtypes([object])
tmp_df = tmp_df.stack().str.decode("utf-8").unstack()
for col in tmp_df:
data[col] = tmp_df[col].replace({"?": None})
x = data.drop(["class"], axis=1)
y = data["class"]
Train RuleKit model¶
[11]:
# RuleKit
RuleKit.init()
reg = RuleRegressor(
induction_measure=Measures.C2,
pruning_measure=Measures.C2,
voting_measure=Measures.C2,
)
reg.fit(x, y)
[11]:
<rulekit.regression.RuleRegressor at 0x28bffccc670>
Rules¶
[12]:
for rule in reg.model.rules:
print(rule, rule.stats)
IF vendor = {formation} THEN class = {34} [34,34] (p = 5.0, n = 0.0, P = 6.0, N = 203.0, weight = 0.9166666666666667, pvalue = 0.0)
IF MMIN = <80, inf) AND MMAX = (-inf, 1750) THEN class = {18} [16.92,19.08] (p = 10.0, n = 1.0, P = 11.0, N = 198.0, weight = 0.8629476584022039, pvalue = 7.355108555449812e-21)
IF MMIN = <756, inf) AND MMAX = (-inf, 4250) AND CHMAX = <7, 22) AND CHMIN = (-inf, 3.50) THEN class = {32} [30.64,33.36] (p = 4.0, n = 1.0, P = 7.0, N = 202.0, weight = 0.6231258840169731, pvalue = 1.1803717269256882e-08)
IF MMIN = <756, inf) AND MMAX = (-inf, 4250) AND MYCT = (-inf, 232.50) AND CHMAX = <3.50, 22) AND CHMIN = (-inf, 3.50) THEN class = {29} [24.98,33.02] (p = 15.0, n = 3.0, P = 35.0, N = 174.0, weight = 0.5712917350848385, pvalue = 7.408462419973687e-25)
IF MMIN = (-inf, 1500) AND MMAX = <1500, 4250) AND MYCT = <94.50, inf) AND CHMAX = <2.50, 44) THEN class = {24} [21.77,26.23] (p = 18.0, n = 7.0, P = 23.0, N = 186.0, weight = 0.6108789153810191, pvalue = 1.183267277682215e-40)
IF MMAX = (-inf, 4750) THEN class = {24} [10.30,37.70] (p = 69.0, n = 2.0, P = 88.0, N = 121.0, weight = 0.8486424746828075, pvalue = 1.6425318084016525e-60)
IF MYCT = <87, inf) AND CHMAX = (-inf, 96) THEN class = {29} [1.17,56.83] (p = 107.0, n = 11.0, P = 124.0, N = 85.0, weight = 0.7179513877721673, pvalue = 1.3893662585668293e-64)
IF MMAX = <6150, 9240) AND MYCT = (-inf, 129) AND CACH = <2, 28) AND CHMAX = (-inf, 46) THEN class = {46} [43.77,48.23] (p = 9.0, n = 2.0, P = 13.0, N = 196.0, weight = 0.6821036106750392, pvalue = 1.023395667474569e-17)
IF MMIN = (-inf, 2150) AND MMAX = <5000, 9240) AND MYCT = (-inf, 146.50) AND CHMAX = <5.50, inf) THEN class = {46} [14.85,77.15] (p = 25.0, n = 1.0, P = 143.0, N = 66.0, weight = 0.5158687466379773, pvalue = 7.403283011266057e-14)
IF MMIN = <2310, 4500) AND MYCT = <31.50, 102.50) AND CACH = (-inf, 48) AND CHMAX = (-inf, 40) THEN class = {80} [57.27,102.73] (p = 12.0, n = 2.0, P = 34.0, N = 175.0, weight = 0.5610564225690277, pvalue = 1.34750514438087e-09)
IF MMIN = <640, 4500) AND MMAX = <7150, 24000) THEN class = {65} [36.20,93.80] (p = 60.0, n = 13.0, P = 68.0, N = 141.0, weight = 0.6927380687046022, pvalue = 2.2589525624983582e-39)
IF MYCT = <27.50, 44) AND CHMIN = (-inf, 10) THEN class = {253} [192.76,313.24] (p = 7.0, n = 3.0, P = 12.0, N = 197.0, weight = 0.5396996615905246, pvalue = 0.001963352522246969)
IF MMIN = <884, inf) AND MMAX = <9240, inf) AND CHMAX = <2.50, 88) AND CHMIN = (-inf, 14) THEN class = {117} [44.09,189.91] (p = 49.0, n = 11.0, P = 80.0, N = 129.0, weight = 0.5667708333333334, pvalue = 4.475942404933969e-11)
IF MMIN = <3000, inf) AND MMAX = <24000, 48000) AND CHMIN = <14, inf) THEN class = {381} [301.01,460.99] (p = 6.0, n = 1.0, P = 8.0, N = 201.0, weight = 0.7450248756218906, pvalue = 0.047637666066025854)
IF MMIN = (-inf, 24000) AND MMAX = <28000, inf) AND MYCT = (-inf, 95) AND CACH = (-inf, 192) THEN class = {341} [129.60,552.40] (p = 19.0, n = 3.0, P = 34.0, N = 175.0, weight = 0.6524789915966387, pvalue = 0.990671648706587)
RuleXAI¶
[13]:
explainer = RuleExplainer(model=reg, X=x, y=y, type="regression")
explainer.explain()
[13]:
<rulexai.explainer.RuleExplainer at 0x28ba8c77b50>
Feature importance¶
[14]:
explainer.feature_importances_
[14]:
attributes | importances | |
---|---|---|
3 | MMAX | 4.014332 |
2 | CHMIN | 3.028757 |
6 | vendor | 0.916667 |
1 | CHMAX | 0.460550 |
0 | CACH | 0.289558 |
4 | MMIN | 0.167137 |
5 | MYCT | -1.233983 |
Condition importance¶
[15]:
explainer.condition_importances_
[15]:
conditions | importances | |
---|---|---|
0 | CHMIN = (-inf, 10.0) | 2.127775 |
1 | vendor = {formation} | 0.916667 |
2 | MMAX = (-inf, 4750.0) | 0.848642 |
3 | MMAX = (-inf, 1750.0) | 0.827179 |
4 | MYCT = <87.0, inf) | 0.643064 |
5 | MMAX = (-inf, 4250.0) | 0.528220 |
6 | MMAX = <7150.0, 24000.0) | 0.481404 |
7 | CHMIN = (-inf, 14.0) | 0.402859 |
8 | MMAX = <28000.0, inf) | 0.381381 |
9 | MMAX = <24000.0, 48000.0) | 0.339882 |
10 | MMAX = <6150.0, 9240.0) | 0.307522 |
11 | CHMIN = (-inf, 3.5) | 0.260506 |
12 | CHMIN = <14.0, inf) | 0.237616 |
13 | MMAX = <1500.0, 4250.0) | 0.224479 |
14 | MMIN = <640.0, 4500.0) | 0.211334 |
15 | MMAX = <5000.0, 9240.0) | 0.198756 |
16 | MMIN = (-inf, 1500.0) | 0.198058 |
17 | MMIN = (-inf, 2150.0) | 0.185016 |
18 | MYCT = <94.5, inf) | 0.179675 |
19 | CHMAX = <2.5, 88.0) | 0.165561 |
20 | CACH = (-inf, 48.0) | 0.154017 |
21 | CACH = <2.0, 28.0) | 0.109025 |
22 | MMIN = <2310.0, 4500.0) | 0.090892 |
23 | CHMAX = (-inf, 96.0) | 0.074887 |
24 | CHMAX = (-inf, 46.0) | 0.066936 |
25 | CHMAX = <7.0, 22.0) | 0.062233 |
26 | CHMAX = (-inf, 40.0) | 0.059474 |
27 | CHMAX = <3.5, 22.0) | 0.056674 |
28 | MMIN = (-inf, 24000.0) | 0.054221 |
29 | CHMAX = <2.5, 44.0) | 0.049421 |
30 | MMIN = <80.0, inf) | 0.035768 |
31 | CACH = (-inf, 192.0) | 0.026516 |
32 | MYCT = <31.5, 102.5) | 0.026372 |
33 | MMIN = <756.0, inf) | 0.003292 |
34 | MYCT = (-inf, 232.5) | -0.033761 |
35 | MMIN = <884.0, inf) | -0.069930 |
36 | CHMAX = <5.5, inf) | -0.074637 |
37 | MYCT = (-inf, 146.5) | -0.117779 |
38 | MMAX = <9240.0, inf) | -0.123134 |
39 | MYCT = (-inf, 129.0) | -0.151520 |
40 | MYCT = (-inf, 95.0) | -0.191957 |
41 | MMIN = <3000.0, inf) | -0.541514 |
42 | MYCT = <27.5, 44.0) | -1.588076 |
Local explainability¶
[16]:
explainer.local_explainability(x.iloc[0, :], pd.DataFrame(y).iloc[0, :], plot = True)
Example:
vendor adviser
MYCT 125.0
MMIN 256.0
MMAX 6000.0
CACH 256.0
CHMIN 16.0
CHMAX 128.0
class 199.0
Name: 0, dtype: object
Rules that covers this example:
IF MMIN = (-inf, 2150.0) AND MMAX = <5000.0, 9240.0) AND MYCT = (-inf, 146.5) AND CHMAX = <5.5, inf) THEN class = {46.0}
Importances of the conditions from rules covering the example
conditions importances
0 MMAX = <5000.0, 9240.0) 0.198756
1 MMIN = (-inf, 2150.0) 0.185016
2 CHMAX = <5.5, inf) -0.074637
3 MYCT = (-inf, 146.5) -0.117779
[16]:
conditions | importances | |
---|---|---|
0 | MMAX = <5000.0, 9240.0) | 0.198756 |
1 | MMIN = (-inf, 2150.0) | 0.185016 |
2 | CHMAX = <5.5, inf) | -0.074637 |
3 | MYCT = (-inf, 146.5) | -0.117779 |