您现在的位置是：首页 > 大数据

当前栏目

Intel DAAL AI加速 ——传统决策树和随机森林

AI 加速随机传统 Intel 森林决策树

2023-09-14 09:11:52 时间

# file: dt_cls_dense_batch.py
#===============================================================================
# Copyright 2014-2018 Intel Corporation.
#
# This software and the related documents are Intel copyrighted  materials,  and
# your use of  them is  governed by the  express license  under which  they were
# provided to you (License).  Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute,  disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents  are provided as  is,  with no express
# or implied  warranties,  other  than those  that are  expressly stated  in the
# License.
#===============================================================================

## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a>
## \example dt_cls_dense_batch.py

import os
import sys

from daal.algorithms.decision_tree.classification import prediction, training
from daal.algorithms import classifier
from daal.data_management import (
    FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable
)
utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
    sys.path.insert(0, utils_folder)
from utils import printNumericTables

DAAL_PREFIX = os.path.join('..', 'data')

# Input data set parameters
trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv')
pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv')
testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv')

nFeatures = 5
nClasses = 5

# Model object for the decision tree classification algorithm
model = None
predictionResult = None
testGroundTruth = None


def trainModel():
    global model

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
    trainDataSource = FileDataSource(
        trainDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for training data and labels
    trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
    trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
    mergedData = MergedNumericTable(trainData, trainGroundTruth)

    # Retrieve the data from the input file
    trainDataSource.loadDataBlock(mergedData)

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
    pruneDataSource = FileDataSource(
        pruneDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for pruning data and labels
    pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
    pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
    pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)

    # Retrieve the data from the input file
    pruneDataSource.loadDataBlock(pruneMergedData)

    # Create an algorithm object to train the decision tree classification model
    algorithm = training.Batch(nClasses)

    # Pass the training data set and dependent values to the algorithm
    algorithm.input.set(classifier.training.data, trainData)
    algorithm.input.set(classifier.training.labels, trainGroundTruth)
    algorithm.input.setTable(training.dataForPruning, pruneData)
    algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)

    # Train the decision tree classification model and retrieve the results of the training algorithm
    trainingResult = algorithm.compute()
    model = trainingResult.get(classifier.training.model)

def testModel():
    global testGroundTruth, predictionResult

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
    testDataSource = FileDataSource(
        testDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for testing data and labels
    testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
    testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
    mergedData = MergedNumericTable(testData, testGroundTruth)

    # Retrieve the data from input file
    testDataSource.loadDataBlock(mergedData)

    # Create algorithm objects for decision tree classification prediction with the default method
    algorithm = prediction.Batch()

    # Pass the testing data set and trained model to the algorithm
    #print("Number of columns: {}".format(testData.getNumberOfColumns()))
    algorithm.input.setTable(classifier.prediction.data,  testData)
    algorithm.input.setModel(classifier.prediction.model, model)

    # Compute prediction results and retrieve algorithm results
    # (Result class from classifier.prediction)
    predictionResult = algorithm.compute()


def printResults():

    printNumericTables(
        testGroundTruth,
        predictionResult.get(classifier.prediction.prediction),
        "Ground truth", "Classification results",
        "Decision tree classification results (first 20 observations):",
        20, flt64=False
    )

if __name__ == "__main__":

    trainModel()
    testModel()
    printResults()

随机森林的：

# file: df_cls_dense_batch.py
#===============================================================================
# Copyright 2014-2018 Intel Corporation.
#
# This software and the related documents are Intel copyrighted  materials,  and
# your use of  them is  governed by the  express license  under which  they were
# provided to you (License).  Unless the License provides otherwise, you may not
# use, modify, copy, publish, distribute,  disclose or transmit this software or
# the related documents without Intel's prior written permission.
#
# This software and the related documents  are provided as  is,  with no express
# or implied  warranties,  other  than those  that are  expressly stated  in the
# License.
#===============================================================================

## <a name="DAAL-EXAMPLE-PY-DF_CLS_DENSE_BATCH"></a>
## \example df_cls_dense_batch.py

import os
import sys

from daal.algorithms import decision_forest
from daal.algorithms.decision_forest.classification import prediction, training
from daal.algorithms import classifier
from daal.data_management import (
    FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable,
    MergedNumericTable, features
)

utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
    sys.path.insert(0, utils_folder)
from utils import printNumericTable, printNumericTables

DAAL_PREFIX = os.path.join('..', 'data')

# Input data set parameters
trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_train.csv')
testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_test.csv')

nFeatures = 3
nClasses = 5

# Decision forest parameters
nTrees = 10
minObservationsInLeafNode = 8

# Model object for the decision forest classification algorithm
model = None
predictionResult = None
testGroundTruth = None


def trainModel():
    global model

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file
    trainDataSource = FileDataSource(
        trainDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for training data and labels
    trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
    trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
    mergedData = MergedNumericTable(trainData, trainGroundTruth)

    # Retrieve the data from the input file
    trainDataSource.loadDataBlock(mergedData)

    #  Get the dictionary and update it with additional information about data
    dict = trainData.getDictionary()

    #  Add a feature type to the dictionary
    dict[0].featureType = features.DAAL_CONTINUOUS
    dict[1].featureType = features.DAAL_CONTINUOUS
    dict[2].featureType = features.DAAL_CATEGORICAL

    # Create an algorithm object to train the decision forest classification model
    algorithm = training.Batch(nClasses)
    algorithm.parameter.nTrees = nTrees
    algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
    algorithm.parameter.featuresPerNode = nFeatures
    algorithm.parameter.varImportance = decision_forest.training.MDI
    algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError

    # Pass the training data set and dependent values to the algorithm
    algorithm.input.set(classifier.training.data, trainData)
    algorithm.input.set(classifier.training.labels, trainGroundTruth)

    # Train the decision forest classification model and retrieve the results of the training algorithm
    trainingResult = algorithm.compute()
    model = trainingResult.get(classifier.training.model)
    printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")
    printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ")

def testModel():
    global testGroundTruth, predictionResult

    # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file
    testDataSource = FileDataSource(
        testDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for testing data and labels
    testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
    testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
    mergedData = MergedNumericTable(testData, testGroundTruth)

    # Retrieve the data from input file
    testDataSource.loadDataBlock(mergedData)

    #  Get the dictionary and update it with additional information about data
    dict = testData.getDictionary()

    #  Add a feature type to the dictionary
    dict[0].featureType = features.DAAL_CONTINUOUS
    dict[1].featureType = features.DAAL_CONTINUOUS
    dict[2].featureType = features.DAAL_CATEGORICAL

    # Create algorithm objects for decision forest classification prediction with the default method
    algorithm = prediction.Batch(nClasses)

    # Pass the testing data set and trained model to the algorithm
    algorithm.input.setTable(classifier.prediction.data,  testData)
    algorithm.input.setModel(classifier.prediction.model, model)

    # Compute prediction results and retrieve algorithm results
    # (Result class from classifier.prediction)
    predictionResult = algorithm.compute()


def printResults():
    printNumericTable(predictionResult.get(classifier.prediction.prediction),"Decision forest prediction results (first 10 rows):",10)
    printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10);

if __name__ == "__main__":

    trainModel()
    testModel()
    printResults()

猜你喜欢

不要编写太复杂的复合表达式
逃逸机器学习的安全检测——evadeML、malGAN、deep-pwning、foolbox、Gym-Malware，防御的话有Defense-GAN: Protecting Classifiers Against Adversarial Attacks Using Generative Models（生成式模型）
Flutter入门（五）--表单+单选/多选+日期+轮播+对话框
[Javascript] Use an Array of Promises with a For Await Of Loop
Flink的高可用集群环境
Linux 操作系统 CPU numa架构
FFmpeg分离（解封装）视频和音频
Testing - 软件测试知识梳理 - 基础概念
atitit. 文件上传带进度条 atiUP 设计 java c# php
在cvs中添加用户命令
Py之lime：lime库的简介、安装、使用方法之详细攻略
k8s部署rook-ceph
【hdu 6000】Wash
数据结构模版----单链表SimpleLinkList[不带头结点](C语言实现)
列举一下项目中使用的产品和技术
linux 安装docker 笔记
AutoCppHeader AutoHeader 自动根据CPP 或C文件来生成头文件。
SQL Server数据库高级进阶之锁实战演练

相关主题

AI教你学测试
未来与AI
【AI】卷积
百度AI
AI update
AI 竞赛2022来啦
AI与制药
AI工具的使用
AI：AI是什么？
AI资源
AI与RPA
AI进阶之路
该让AI 落地了

zl程序教程

当前栏目

Intel DAAL AI加速 ——传统决策树和随机森林

相关文章