[2]:
import pandas as pd
from scipy.io import arff
from rulekit import RuleKit
from rulekit.regression import RuleRegressor
from rulekit.params import Measures

from rulexai.explainer import RuleExplainer

CPU

Read data

[3]:
dataset_path = "./data/cpu.arff"
data = pd.DataFrame(arff.loadarff(dataset_path)[0])

# code to change encoding of the file
tmp_df = data.select_dtypes([object])
tmp_df = tmp_df.stack().str.decode("utf-8").unstack()
for col in tmp_df:
    data[col] = tmp_df[col].replace({"?": None})

x = data.drop(["class"], axis=1)
y = data["class"]

Train RuleKit model

[11]:
# RuleKit
RuleKit.init()

reg = RuleRegressor(
    induction_measure=Measures.C2,
    pruning_measure=Measures.C2,
    voting_measure=Measures.C2,
)
reg.fit(x, y)
[11]:
<rulekit.regression.RuleRegressor at 0x28bffccc670>

Rules

[12]:
for rule in reg.model.rules:
    print(rule, rule.stats)
IF vendor = {formation} THEN class = {34} [34,34] (p = 5.0, n = 0.0, P = 6.0, N = 203.0, weight = 0.9166666666666667, pvalue = 0.0)
IF MMIN = <80, inf) AND MMAX = (-inf, 1750) THEN class = {18} [16.92,19.08] (p = 10.0, n = 1.0, P = 11.0, N = 198.0, weight = 0.8629476584022039, pvalue = 7.355108555449812e-21)
IF MMIN = <756, inf) AND MMAX = (-inf, 4250) AND CHMAX = <7, 22) AND CHMIN = (-inf, 3.50) THEN class = {32} [30.64,33.36] (p = 4.0, n = 1.0, P = 7.0, N = 202.0, weight = 0.6231258840169731, pvalue = 1.1803717269256882e-08)
IF MMIN = <756, inf) AND MMAX = (-inf, 4250) AND MYCT = (-inf, 232.50) AND CHMAX = <3.50, 22) AND CHMIN = (-inf, 3.50) THEN class = {29} [24.98,33.02] (p = 15.0, n = 3.0, P = 35.0, N = 174.0, weight = 0.5712917350848385, pvalue = 7.408462419973687e-25)
IF MMIN = (-inf, 1500) AND MMAX = <1500, 4250) AND MYCT = <94.50, inf) AND CHMAX = <2.50, 44) THEN class = {24} [21.77,26.23] (p = 18.0, n = 7.0, P = 23.0, N = 186.0, weight = 0.6108789153810191, pvalue = 1.183267277682215e-40)
IF MMAX = (-inf, 4750) THEN class = {24} [10.30,37.70] (p = 69.0, n = 2.0, P = 88.0, N = 121.0, weight = 0.8486424746828075, pvalue = 1.6425318084016525e-60)
IF MYCT = <87, inf) AND CHMAX = (-inf, 96) THEN class = {29} [1.17,56.83] (p = 107.0, n = 11.0, P = 124.0, N = 85.0, weight = 0.7179513877721673, pvalue = 1.3893662585668293e-64)
IF MMAX = <6150, 9240) AND MYCT = (-inf, 129) AND CACH = <2, 28) AND CHMAX = (-inf, 46) THEN class = {46} [43.77,48.23] (p = 9.0, n = 2.0, P = 13.0, N = 196.0, weight = 0.6821036106750392, pvalue = 1.023395667474569e-17)
IF MMIN = (-inf, 2150) AND MMAX = <5000, 9240) AND MYCT = (-inf, 146.50) AND CHMAX = <5.50, inf) THEN class = {46} [14.85,77.15] (p = 25.0, n = 1.0, P = 143.0, N = 66.0, weight = 0.5158687466379773, pvalue = 7.403283011266057e-14)
IF MMIN = <2310, 4500) AND MYCT = <31.50, 102.50) AND CACH = (-inf, 48) AND CHMAX = (-inf, 40) THEN class = {80} [57.27,102.73] (p = 12.0, n = 2.0, P = 34.0, N = 175.0, weight = 0.5610564225690277, pvalue = 1.34750514438087e-09)
IF MMIN = <640, 4500) AND MMAX = <7150, 24000) THEN class = {65} [36.20,93.80] (p = 60.0, n = 13.0, P = 68.0, N = 141.0, weight = 0.6927380687046022, pvalue = 2.2589525624983582e-39)
IF MYCT = <27.50, 44) AND CHMIN = (-inf, 10) THEN class = {253} [192.76,313.24] (p = 7.0, n = 3.0, P = 12.0, N = 197.0, weight = 0.5396996615905246, pvalue = 0.001963352522246969)
IF MMIN = <884, inf) AND MMAX = <9240, inf) AND CHMAX = <2.50, 88) AND CHMIN = (-inf, 14) THEN class = {117} [44.09,189.91] (p = 49.0, n = 11.0, P = 80.0, N = 129.0, weight = 0.5667708333333334, pvalue = 4.475942404933969e-11)
IF MMIN = <3000, inf) AND MMAX = <24000, 48000) AND CHMIN = <14, inf) THEN class = {381} [301.01,460.99] (p = 6.0, n = 1.0, P = 8.0, N = 201.0, weight = 0.7450248756218906, pvalue = 0.047637666066025854)
IF MMIN = (-inf, 24000) AND MMAX = <28000, inf) AND MYCT = (-inf, 95) AND CACH = (-inf, 192) THEN class = {341} [129.60,552.40] (p = 19.0, n = 3.0, P = 34.0, N = 175.0, weight = 0.6524789915966387, pvalue = 0.990671648706587)

RuleXAI

[13]:
explainer = RuleExplainer(model=reg, X=x, y=y, type="regression")
explainer.explain()
[13]:
<rulexai.explainer.RuleExplainer at 0x28ba8c77b50>

Feature importance

[14]:
explainer.feature_importances_
[14]:
attributes importances
3 MMAX 4.014332
2 CHMIN 3.028757
6 vendor 0.916667
1 CHMAX 0.460550
0 CACH 0.289558
4 MMIN 0.167137
5 MYCT -1.233983

Condition importance

[15]:
explainer.condition_importances_
[15]:
conditions importances
0 CHMIN = (-inf, 10.0) 2.127775
1 vendor = {formation} 0.916667
2 MMAX = (-inf, 4750.0) 0.848642
3 MMAX = (-inf, 1750.0) 0.827179
4 MYCT = <87.0, inf) 0.643064
5 MMAX = (-inf, 4250.0) 0.528220
6 MMAX = <7150.0, 24000.0) 0.481404
7 CHMIN = (-inf, 14.0) 0.402859
8 MMAX = <28000.0, inf) 0.381381
9 MMAX = <24000.0, 48000.0) 0.339882
10 MMAX = <6150.0, 9240.0) 0.307522
11 CHMIN = (-inf, 3.5) 0.260506
12 CHMIN = <14.0, inf) 0.237616
13 MMAX = <1500.0, 4250.0) 0.224479
14 MMIN = <640.0, 4500.0) 0.211334
15 MMAX = <5000.0, 9240.0) 0.198756
16 MMIN = (-inf, 1500.0) 0.198058
17 MMIN = (-inf, 2150.0) 0.185016
18 MYCT = <94.5, inf) 0.179675
19 CHMAX = <2.5, 88.0) 0.165561
20 CACH = (-inf, 48.0) 0.154017
21 CACH = <2.0, 28.0) 0.109025
22 MMIN = <2310.0, 4500.0) 0.090892
23 CHMAX = (-inf, 96.0) 0.074887
24 CHMAX = (-inf, 46.0) 0.066936
25 CHMAX = <7.0, 22.0) 0.062233
26 CHMAX = (-inf, 40.0) 0.059474
27 CHMAX = <3.5, 22.0) 0.056674
28 MMIN = (-inf, 24000.0) 0.054221
29 CHMAX = <2.5, 44.0) 0.049421
30 MMIN = <80.0, inf) 0.035768
31 CACH = (-inf, 192.0) 0.026516
32 MYCT = <31.5, 102.5) 0.026372
33 MMIN = <756.0, inf) 0.003292
34 MYCT = (-inf, 232.5) -0.033761
35 MMIN = <884.0, inf) -0.069930
36 CHMAX = <5.5, inf) -0.074637
37 MYCT = (-inf, 146.5) -0.117779
38 MMAX = <9240.0, inf) -0.123134
39 MYCT = (-inf, 129.0) -0.151520
40 MYCT = (-inf, 95.0) -0.191957
41 MMIN = <3000.0, inf) -0.541514
42 MYCT = <27.5, 44.0) -1.588076

Local explainability

[16]:
explainer.local_explainability(x.iloc[0, :], pd.DataFrame(y).iloc[0, :], plot = True)
Example:
vendor    adviser
MYCT        125.0
MMIN        256.0
MMAX       6000.0
CACH        256.0
CHMIN        16.0
CHMAX       128.0
class       199.0
Name: 0, dtype: object

Rules that covers this example:
IF MMIN = (-inf, 2150.0) AND MMAX = <5000.0, 9240.0) AND MYCT = (-inf, 146.5) AND CHMAX = <5.5, inf) THEN class = {46.0}

Importances of the conditions from rules covering the example
                conditions  importances
0  MMAX = <5000.0, 9240.0)     0.198756
1    MMIN = (-inf, 2150.0)     0.185016
2       CHMAX = <5.5, inf)    -0.074637
3     MYCT = (-inf, 146.5)    -0.117779
../../_images/rst_tutorials_regression_15_1.png
[16]:
conditions importances
0 MMAX = <5000.0, 9240.0) 0.198756
1 MMIN = (-inf, 2150.0) 0.185016
2 CHMAX = <5.5, inf) -0.074637
3 MYCT = (-inf, 146.5) -0.117779