Intel DAAL AI加速 ——传统决策树和随机森林
AI 加速 随机 传统 Intel 森林 决策树
2023-09-14 09:11:52 时间
# file: dt_cls_dense_batch.py #=============================================================================== # Copyright 2014-2018 Intel Corporation. # # This software and the related documents are Intel copyrighted materials, and # your use of them is governed by the express license under which they were # provided to you (License). Unless the License provides otherwise, you may not # use, modify, copy, publish, distribute, disclose or transmit this software or # the related documents without Intel's prior written permission. # # This software and the related documents are provided as is, with no express # or implied warranties, other than those that are expressly stated in the # License. #=============================================================================== ## <a name="DAAL-EXAMPLE-PY-DT_CLS_DENSE_BATCH"></a> ## \example dt_cls_dense_batch.py import os import sys from daal.algorithms.decision_tree.classification import prediction, training from daal.algorithms import classifier from daal.data_management import ( FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable ) utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) if utils_folder not in sys.path: sys.path.insert(0, utils_folder) from utils import printNumericTables DAAL_PREFIX = os.path.join('..', 'data') # Input data set parameters trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_train.csv') pruneDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_prune.csv') testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'decision_tree_test.csv') nFeatures = 5 nClasses = 5 # Model object for the decision tree classification algorithm model = None predictionResult = None testGroundTruth = None def trainModel(): global model # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file trainDataSource = FileDataSource( trainDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext ) # Create Numeric Tables for training data and labels trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate) trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate) mergedData = MergedNumericTable(trainData, trainGroundTruth) # Retrieve the data from the input file trainDataSource.loadDataBlock(mergedData) # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file pruneDataSource = FileDataSource( pruneDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext ) # Create Numeric Tables for pruning data and labels pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate) pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate) pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth) # Retrieve the data from the input file pruneDataSource.loadDataBlock(pruneMergedData) # Create an algorithm object to train the decision tree classification model algorithm = training.Batch(nClasses) # Pass the training data set and dependent values to the algorithm algorithm.input.set(classifier.training.data, trainData) algorithm.input.set(classifier.training.labels, trainGroundTruth) algorithm.input.setTable(training.dataForPruning, pruneData) algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth) # Train the decision tree classification model and retrieve the results of the training algorithm trainingResult = algorithm.compute() model = trainingResult.get(classifier.training.model) def testModel(): global testGroundTruth, predictionResult # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file testDataSource = FileDataSource( testDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext ) # Create Numeric Tables for testing data and labels testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate) testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate) mergedData = MergedNumericTable(testData, testGroundTruth) # Retrieve the data from input file testDataSource.loadDataBlock(mergedData) # Create algorithm objects for decision tree classification prediction with the default method algorithm = prediction.Batch() # Pass the testing data set and trained model to the algorithm #print("Number of columns: {}".format(testData.getNumberOfColumns())) algorithm.input.setTable(classifier.prediction.data, testData) algorithm.input.setModel(classifier.prediction.model, model) # Compute prediction results and retrieve algorithm results # (Result class from classifier.prediction) predictionResult = algorithm.compute() def printResults(): printNumericTables( testGroundTruth, predictionResult.get(classifier.prediction.prediction), "Ground truth", "Classification results", "Decision tree classification results (first 20 observations):", 20, flt64=False ) if __name__ == "__main__": trainModel() testModel() printResults()
随机森林的:
# file: df_cls_dense_batch.py #=============================================================================== # Copyright 2014-2018 Intel Corporation. # # This software and the related documents are Intel copyrighted materials, and # your use of them is governed by the express license under which they were # provided to you (License). Unless the License provides otherwise, you may not # use, modify, copy, publish, distribute, disclose or transmit this software or # the related documents without Intel's prior written permission. # # This software and the related documents are provided as is, with no express # or implied warranties, other than those that are expressly stated in the # License. #=============================================================================== ## <a name="DAAL-EXAMPLE-PY-DF_CLS_DENSE_BATCH"></a> ## \example df_cls_dense_batch.py import os import sys from daal.algorithms import decision_forest from daal.algorithms.decision_forest.classification import prediction, training from daal.algorithms import classifier from daal.data_management import ( FileDataSource, DataSourceIface, NumericTableIface, HomogenNumericTable, MergedNumericTable, features ) utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) if utils_folder not in sys.path: sys.path.insert(0, utils_folder) from utils import printNumericTable, printNumericTables DAAL_PREFIX = os.path.join('..', 'data') # Input data set parameters trainDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_train.csv') testDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'df_classification_test.csv') nFeatures = 3 nClasses = 5 # Decision forest parameters nTrees = 10 minObservationsInLeafNode = 8 # Model object for the decision forest classification algorithm model = None predictionResult = None testGroundTruth = None def trainModel(): global model # Initialize FileDataSource<CSVFeatureManager> to retrieve the input data from a .csv file trainDataSource = FileDataSource( trainDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext ) # Create Numeric Tables for training data and labels trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate) trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate) mergedData = MergedNumericTable(trainData, trainGroundTruth) # Retrieve the data from the input file trainDataSource.loadDataBlock(mergedData) # Get the dictionary and update it with additional information about data dict = trainData.getDictionary() # Add a feature type to the dictionary dict[0].featureType = features.DAAL_CONTINUOUS dict[1].featureType = features.DAAL_CONTINUOUS dict[2].featureType = features.DAAL_CATEGORICAL # Create an algorithm object to train the decision forest classification model algorithm = training.Batch(nClasses) algorithm.parameter.nTrees = nTrees algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode algorithm.parameter.featuresPerNode = nFeatures algorithm.parameter.varImportance = decision_forest.training.MDI algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError # Pass the training data set and dependent values to the algorithm algorithm.input.set(classifier.training.data, trainData) algorithm.input.set(classifier.training.labels, trainGroundTruth) # Train the decision forest classification model and retrieve the results of the training algorithm trainingResult = algorithm.compute() model = trainingResult.get(classifier.training.model) printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ") printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ") def testModel(): global testGroundTruth, predictionResult # Initialize FileDataSource<CSVFeatureManager> to retrieve the test data from a .csv file testDataSource = FileDataSource( testDatasetFileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext ) # Create Numeric Tables for testing data and labels testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate) testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate) mergedData = MergedNumericTable(testData, testGroundTruth) # Retrieve the data from input file testDataSource.loadDataBlock(mergedData) # Get the dictionary and update it with additional information about data dict = testData.getDictionary() # Add a feature type to the dictionary dict[0].featureType = features.DAAL_CONTINUOUS dict[1].featureType = features.DAAL_CONTINUOUS dict[2].featureType = features.DAAL_CATEGORICAL # Create algorithm objects for decision forest classification prediction with the default method algorithm = prediction.Batch(nClasses) # Pass the testing data set and trained model to the algorithm algorithm.input.setTable(classifier.prediction.data, testData) algorithm.input.setModel(classifier.prediction.model, model) # Compute prediction results and retrieve algorithm results # (Result class from classifier.prediction) predictionResult = algorithm.compute() def printResults(): printNumericTable(predictionResult.get(classifier.prediction.prediction),"Decision forest prediction results (first 10 rows):",10) printNumericTable(testGroundTruth, "Ground truth (first 10 rows):", 10); if __name__ == "__main__": trainModel() testModel() printResults()
相关文章
- 解密昇腾AI处理器--DaVinci架构(总览)
- 解锁云原生 AI 技能|在 Kubernetes 上构建机器学习系统
- AI - AutoKeras - 简介
- Python3实现基于百度AI开放平台和图灵机器人API聊天机器人
- AI:一个20年程序猿的学习资料大全—BAT等面试资料/NECCS大赛资料/一二级建造师/网络编程爬虫等/公务员——只有你不想要的,没有你找不到的
- AI:人工智能的多模态融合模型的简介、发展以及未来趋势
- AI:2019年3月29日教育部最新公布35所大学获批四年制【人工智能】本科专业
- 又一重要进展发布!OpenMMLab算法仓支持昇腾AI训练加速
- 华为鲁勇:5G+云+AI三大核心引擎将驱动广州数字经济发展
- 华为云新加坡峰会发布Cloud&AI创新实验室,四大核心优势助力智能化升级
- 带你从0到1开发AI图像分类应用
- 昇腾AI处理器软件栈--张量加速引擎(TBE)
- 【AI人工智能】AI绘画能取代设计师?
- 【大数据 AI】视觉ChatGPT来了,微软发布,代码已开源
- TensorFlowX.Y核心基础与AI模型设计06:TF2模型的输入输出思想、模型保存为单文件多文件、加载模型输入参数