LDA主题模型的java代码实现详解大数据
public static void main(String[] args) throws IOException { // TODO Auto-generated method stub String resultPath = "ldaResult/"; String parameterFile= "source/lda_parameters.txt"; modelparameters ldaparameters = new modelparameters(); getParametersFromFile(ldaparameters, parameterFile); String dirPath = "LDATrain/"; Documents docSet = new Documents(); docSet.readDocs(dirPath); System.out.println("wordMap size " + docSet.termToIndexMap.size()); FileUtil.mkdir(resultPath); LdaModel model = new LdaModel(ldaparameters); System.out.println("1 Initialize the model ..."); model.initializeModel(docSet); System.out.println("2 Learning and Saving the model ..."); model.inferenceModel(docSet); System.out.println("3 Output the final model ..."); model.saveIteratedModel(ldaparameters.iteration, docSet); System.out.println("Done!"); //预测新文本 String messStr = "好消息!!薇町婚纱造型推出老带新活动啦!已在本店预定的新娘推荐新顾客来本店,定单后即赠送新、老顾客各一支价值58元定妆隔离水(在婚礼当"; Document doc = new Document(messStr); int topicIndex = model.predictNewSampleTopic(doc); Set Word wordSet = model.getWordByTopic(topicIndex); FileUtil.writeKeyWordFile("ldaWords/comparedkeyWords.doc", new ArrayList Word (wordSet));
public class LdaModel { int [][] doc;//word index array int V, K, M;//vocabulary size, topic number, document number int [][] z;//topic label array float alpha; //doc-topic dirichlet prior parameter float beta; //topic-word dirichlet prior parameter int [][] nmk;//given document m, count times of topic k. M*K int [][] nkt;//given topic k, count times of term t. K*V int [] nmkSum;//Sum for each row in nmk int [] nktSum;//Sum for each row in nkt double [][] phi;//Parameters for topic-word distribution K*V double [][] theta;//Parameters for doc-topic distribution M*K int iterations;//Times of iterations int saveStep;//The number of iterations between two saving int beginSaveIters;//Begin save model at this iteration Map String, Integer wordIndexMap; Documents docSet; public LdaModel(LdaGibbsSampling.modelparameters modelparam) { // TODO Auto-generated constructor stub alpha = modelparam.alpha; beta = modelparam.beta; iterations = modelparam.iteration; K = modelparam.topicNum; saveStep = modelparam.saveStep; beginSaveIters = modelparam.beginSaveIters; public void initializeModel(Documents docSet) { this.docSet = docSet; // TODO Auto-generated method stub M = docSet.docs.size(); V = docSet.termToIndexMap.size(); nmk = new int [M][K]; nkt = new int[K][V]; nmkSum = new int[M]; nktSum = new int[K]; phi = new double[K][V]; theta = new double[M][K]; this.wordIndexMap = new HashMap String, Integer //initialize documents index array doc = new int[M][]; for(int m = 0; m m++){ //Notice the limit of memory int N = docSet.docs.get(m).docWords.length; doc[m] = new int[N]; for(int n = 0; n n++){ doc[m][n] = docSet.docs.get(m).docWords[n]; //initialize topic lable z for each word z = new int[M][]; for(int m = 0; m m++){ int N = docSet.docs.get(m).docWords.length; z[m] = new int[N]; for(int n = 0; n n++){ //随机初始化! int initTopic = (int)(Math.random() * K);// From 0 to K - 1 z[m][n] = initTopic; //number of words in doc m assigned to topic initTopic add 1 nmk[m][initTopic]++; //number of terms doc[m][n] assigned to topic initTopic add 1 nkt[initTopic][doc[m][n]]++; // total number of words assigned to topic initTopic add 1 nktSum[initTopic]++; // total number of words in document m is N nmkSum[m] = N; public void inferenceModel(Documents docSet) throws IOException { // TODO Auto-generated method stub if(iterations saveStep + beginSaveIters){ System.err.println("Error: the number of iterations should be larger than " + (saveStep + beginSaveIters)); System.exit(0); for(int i = 0; i iterations; i++){ System.out.println("Iteration " + i); if((i = beginSaveIters) (((i - beginSaveIters) % saveStep) == 0)){ //Saving the model System.out.println("Saving model at iteration " + i +" ... "); //Firstly update parameters updateEstimatedParameters(); //Secondly print model variables saveIteratedModel(i, docSet); //Use Gibbs Sampling to update z[][] for(int m = 0; m m++){ int N = docSet.docs.get(m).docWords.length; for(int n = 0; n n++){ // Sample from p(z_i|z_-i, w) int newTopic = sampleTopicZ(m, n); z[m][n] = newTopic; private void updateEstimatedParameters() { // TODO Auto-generated method stub for(int k = 0; k k++){ for(int t = 0; t t++){ phi[k][t] = (nkt[k][t] + beta) / (nktSum[k] + V * beta); for(int m = 0; m m++){ for(int k = 0; k k++){ theta[m][k] = (nmk[m][k] + alpha) / (nmkSum[m] + K * alpha); private int sampleTopicZ(int m, int n) { // TODO Auto-generated method stub // Sample from p(z_i|z_-i, w) using Gibbs upde rule //Remove topic label for w_{m,n} int oldTopic = z[m][n]; nmk[m][oldTopic]--; nkt[oldTopic][doc[m][n]]--; nmkSum[m]--; nktSum[oldTopic]--; //Compute p(z_i = k|z_-i, w) double [] p = new double[K]; for(int k = 0; k k++){ p[k] = (nkt[k][doc[m][n]] + beta) / (nktSum[k] + V * beta) * (nmk[m][k] + alpha) / (nmkSum[m] + K * alpha); //Sample a new topic label for w_{m, n} like roulette //Compute cumulated probability for p for(int k = 1; k k++){ p[k] += p[k - 1]; double u = Math.random() * p[K - 1]; //p[] is unnormalised int newTopic; for(newTopic = 0; newTopic newTopic++){ if(u p[newTopic]){ break; //Add new topic label for w_{m, n} nmk[m][newTopic]++; nkt[newTopic][doc[m][n]]++; nmkSum[m]++; nktSum[newTopic]++; return newTopic; /** * 对给定的待预测的文本,将其分词结果的单词与训练集的单词的索引对应上 * @param predictWordSet * @return public Map String,String matchTermIndex(Set Word predictWordSet){ /** * key:word的内容 value:文档index-单词index,如“1-2” Map String,String wordIndexMap = new HashMap String, String for(Word word : predictWordSet){ String content = word.getContent(); String indexStr = getTermIndex(content); wordIndexMap.put(content, indexStr); return wordIndexMap; /** * 对于给定单词,找到该单词在训练集中对应的文档和单词索引 * @param content * @return public String getTermIndex(String content){ for(Integer m : docSet.getDocWordsList().keySet()){ LinkedList String list = docSet.getDocWordsList().get(m); for(int i = 0; i list.size(); i ++){ if(list.get(i).equals(content)) return m+"-"+i; return "none"; /** * 在训练完LDA模型后,根据给定的主题索引set,得到每个主题的topNum单词列表集合 * @param topicIndexSet * @param topNum * @return public Set Word getWordByTopics(Set Integer topicIndexSet, int topNum){ Set Word wordSet = new HashSet Word for(Integer indexT : topicIndexSet){ List Integer tWordsIndexArray = new ArrayList Integer for(int j = 0; j j++) tWordsIndexArray.add(new Integer(j)); Collections.sort(tWordsIndexArray, new LdaModel.TwordsComparable(phi[indexT])); for(int t = 0; t topNum; t++){ String content = docSet.indexToTermMap.get(tWordsIndexArray.get(t)); Word word = new Word(content); if(SegmentWordsResult.getStopWordsSet().contains(content)|| ProcessKeyWords.remove(word) || ProcessKeyWords.isMeaninglessWord(content)) continue; wordSet.add(word); return wordSet; public Set Word getWordByTopic(Integer topicIndex){ Set Word wordSet = new HashSet Word List Integer tWordsIndexArray = new ArrayList Integer for(int j = 0; j j++){ tWordsIndexArray.add(new Integer(j)); Collections.sort(tWordsIndexArray, new LdaModel.TwordsComparable(phi[topicIndex])); for(int t = 0; t t++){ String content = docSet.indexToTermMap.get(tWordsIndexArray.get(t)); Word word = new Word(content); word.setWeight(phi[topicIndex][tWordsIndexArray.get(t)]); if(SegmentWordsResult.getStopWordsSet().contains(content)|| ProcessKeyWords.remove(word) || ProcessKeyWords.isMeaninglessWord(content)) continue; if(phi[topicIndex][tWordsIndexArray.get(t)] = 0.0) continue; wordSet.add(word); return wordSet;
double topicProb[] = new double[K]; Map String,String wordIndexMap = matchTermIndex(doc.getWordMap().keySet()); int predict_v = doc.getWordCount(); int [][] predict_nkt;//given topic k, count times of term t. K*V double [][] predict_phi;//Parameters for topic-word distribution K*V int [] predict_z;//topic label array int [] predict_nk;//该文档覆盖的主题索引,值为该文档覆盖指定主题的次数 predict_nkt = new int[K][predict_v]; predict_phi = new double[K][predict_v]; predict_z = new int[predict_v]; predict_nk = new int[K]; for(int index = 0; index predict_v; index++){ String content = doc.getWordsList().get(index); String indexStr = wordIndexMap.get(content); if(indexStr.indexOf("-") == -1) continue; int m = Integer.valueOf(indexStr.substring(0, indexStr.indexOf("-"))); int n = Integer.valueOf(indexStr.substring(indexStr.indexOf("-")+1)); // Sample from p(z_i|z_-i, w) int newTopic = predictSampleTopicZ(m, n); predict_z[index] = newTopic; predict_nkt[newTopic][index] ++; predict_nk[newTopic] ++; for(int k = 0; k k++){ topicProb[k] = (predict_nk[k] + alpha) / (predict_v + K * alpha); return getTopic(topicProb); public int getTopic(double[] topicProp){ int maxIndex = 0; double maxProp = topicProp[0]; Set String words = new HashSet String for(int k = 1; k k ++){ if(maxProp topicProp[k]){ maxProp = topicProp[k]; maxIndex = k; return maxIndex; public int predictSampleTopicZ(int m, int n){ // TODO Auto-generated method stub // Sample from p(z_i|z_-i, w) using Gibbs upde rule //Compute p(z_i = k|z_-i, w) double [] p = new double[K]; for(int k = 0; k k++){ p[k] = (nkt[k][doc[m][n]] + beta) / (nktSum[k] + V * beta) * (nmk[m][k] + alpha) / (nmkSum[m] + K * alpha); //Sample a new topic label for w_{m, n} like roulette //Compute cumulated probability for p for(int k = 1; k k++){ p[k] += p[k - 1]; double u = Math.random() * p[K - 1]; //p[] is unnormalised int newTopic; for(newTopic = 0; newTopic newTopic++){ if(u p[newTopic]){ break; //Add new topic label for w_{m, n} return newTopic; public void saveIteratedModel(int iters, Documents docSet) throws IOException { // TODO Auto-generated method stub //lda.params lda.phi lda.theta lda.tassign lda.twords //lda.params String resultPath = "ldaResult/"; String modelName = "lda_" + iters; ArrayList String lines = new ArrayList String lines.add("alpha = " + alpha); lines.add("beta = " + beta); lines.add("topicNum = " + K); lines.add("docNum = " + M); lines.add("termNum = " + V); lines.add("iterations = " + iterations); lines.add("saveStep = " + saveStep); lines.add("beginSaveIters = " + beginSaveIters); FileUtil.writeLines(resultPath + modelName + ".params", lines); //lda.phi K*V BufferedWriter writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".phi")); for (int i = 0; i i++){ for (int j = 0; j j++){ writer.write(phi[i][j] + "/t"); writer.write("/n"); writer.close(); //lda.theta M*K writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".theta")); for(int i = 0; i i++){ for(int j = 0; j j++){ writer.write(theta[i][j] + "/t"); writer.write("/n"); writer.close(); //lda.tassign writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".tassign")); for(int m = 0; m m++){ for(int n = 0; n doc[m].length; n++){ writer.write(doc[m][n] + ":" + z[m][n] + "/t"); writer.write("/n"); writer.close(); List Word appendwords = new ArrayList Word //lda.twords phi[][] K*V writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".twords")); int topNum = 10; //Find the top 20 topic words in each topic for(int i = 0; i i++){ List Integer tWordsIndexArray = new ArrayList Integer for(int j = 0; j j++){ tWordsIndexArray.add(new Integer(j)); Collections.sort(tWordsIndexArray, new LdaModel.TwordsComparable(phi[i])); writer.write("topic " + i + "/t:/t"); for(int t = 0; t topNum; t++){ writer.write(docSet.indexToTermMap.get(tWordsIndexArray.get(t)) + " " + phi[i][tWordsIndexArray.get(t)] + "/t"); Word word = new Word(docSet.indexToTermMap.get(tWordsIndexArray.get(t))); word.setWeight(phi[i][tWordsIndexArray.get(t)]); appendwords.add(word); writer.write("/n"); writer.close(); //lda.words writer = new BufferedWriter(new FileWriter(resultPath + modelName + ".words")); for(Word word : appendwords){ if(word.getContent().trim().equals("")) continue; writer.write(word.getContent()+"/t"+word.getWeight()+"/n"); writer.close(); public class TwordsComparable implements Comparator Integer { public double [] sortProb; // Store probability of each word in topic k public TwordsComparable (double[] sortProb){ this.sortProb = sortProb; @Override public int compare(Integer o1, Integer o2) { // TODO Auto-generated method stub //Sort topic word index according to the probability of each word in topic k if(sortProb[o1] sortProb[o2]) return -1; else if(sortProb[o1] sortProb[o2]) return 1; else return 0; public static void main(String[] args){ }
public class Documents { ArrayList Document docs; Map String, Integer termToIndexMap; ArrayList String indexToTermMap; Map String,Integer termCountMap; private static NLPIRUtil npr = new NLPIRUtil(); private static Set String stopWordsSet = SegmentWordsResult.getStopWordsSet(); private Map Word,Integer wordDocMap; private Map Integer, LinkedList String docWordsList;//key:第i篇文档,value:单词列表,为了与lda模型中的doc[m][n]的索引对应
termCountMap = new HashMap String, Integer this.wordDocMap = new HashMap Word, Integer this.docWordsList = new HashMap Integer, LinkedList String (); public Map String, Integer getTermCountMap() { return termCountMap;
public void setTermCountMap(Map String, Integer termCountMap) { this.termCountMap = termCountMap; public Map Word, Integer getWordDocMap() { return wordDocMap;
public void setWordDocMap(Map Word, Integer wordDocMap) { this.wordDocMap = wordDocMap;
public void setDocWordsList(Map Integer, LinkedList String docWordsList) { this.docWordsList = docWordsList;
for(File docFile : new File(docsPath).listFiles()){ Document doc = new Document(docFile.getAbsolutePath(), termToIndexMap, indexToTermMap, termCountMap); docs.add(doc); for(Word word : doc.getWordMap().keySet()){ if(this.wordDocMap.containsKey(word)) this.wordDocMap.put(word, this.wordDocMap.get(word)); else this.wordDocMap.put(word, 1); this.docWordsList.put(index++, doc.getWordsList());
private static NLPIRUtil npr = new NLPIRUtil(); private static Set String stopWordsSet = SegmentWordsResult.getStopWordsSet(); private String docName; int[] docWords; private int wordCount; private Map Word, Integer wordMap ; private LinkedList String wordsList;//为了和docWords的索引对应,即单词内容对应索引值 public int getWordCount() { return wordCount; public void setWordCount(int wordCount) { this.wordCount = wordCount; public Map Word, Integer getWordMap() { return wordMap; public void setWordMap(Map Word, Integer wordMap) { this.wordMap = wordMap; public LinkedList String getWordsList() { return wordsList; public void setWordsList(LinkedList String wordsList) { this.wordsList = wordsList; public Document(String docContent){ this.wordMap = new HashMap Word, Integer this.wordsList = new LinkedList String String splitResult = npr.NLPIR_ParagraphProcess(ProcessMessage.dealWithSentence(docContent), 0); String[] wordsArray = splitResult.split(" "); this.docWords = new int[wordsArray.length]; int index = 0; //Transfer word to index for(String str : wordsArray){ String content = ProcessMessage.dealSpecialString(str); Word word = new Word(content); if(ProcessKeyWords.remove(word) || stopWordsSet.contains(content)) continue; else if(content.length() = 1 || RegexMatch.specialMatch(content)) continue; this.wordCount ++; if(!wordMap.containsKey(content)){ int newIndex = wordMap.size(); wordMap.put(word, 1); docWords[index++] = newIndex; }else{ wordMap.put(word, wordMap.get(word)+1); docWords[index++] = wordMap.get(content); this.wordsList.add(content); public Document(String filePath,Map String, Integer termToIndexMap, ArrayList String indexToTermMap, Map String, Integer termCountMap){ this(FileUtil.readContent(filePath)); this.docName = filePath; this.wordMap = new HashMap Word, Integer this.wordsList = new LinkedList String //Read file and initialize word index array String docContent = FileUtil.readContent(docName); String splitResult = npr.NLPIR_ParagraphProcess(docContent, 0); String[] wordsArray = splitResult.split(" "); this.docWords = new int[wordsArray.length]; int index = 0; //Transfer word to index for(String str : wordsArray){ String content = ProcessMessage.dealSpecialString(str); Word word = new Word(content); if(ProcessKeyWords.remove(word) || stopWordsSet.contains(content)) continue; else if(ProcessKeyWords.isMeaninglessWord(content)) continue; this.wordCount ++; if(!termToIndexMap.containsKey(content)){ int newIndex = termToIndexMap.size(); termToIndexMap.put(str, newIndex); indexToTermMap.add(str); termCountMap.put(str, new Integer(1)); docWords[index++] = newIndex; }else{ termCountMap.put(content, termCountMap.get(content) + 1); docWords[index++] = termToIndexMap.get(content); this.wordsList.add(content); if(wordMap.containsKey(word)) wordMap.put(word, wordMap.get(word)+1); else wordMap.put(word, 1); public boolean isNoiseWord(String string) { // TODO Auto-generated method stub string = string.toLowerCase().trim(); Pattern MY_PATTERN = Pattern.compile(".*[a-zA-Z]+.*"); Matcher m = MY_PATTERN.matcher(string); // filter @xxx and URL if(string.matches(".*www//..*") || string.matches(".*//.com.*") || string.matches(".*http:.*") ) return true; else return false; }
上述中的LdaModel中包含了预测新样本的方法predictNewSampleTopic,返回的是该样本的最大概率主题索引,LdaGibbsSampling中是训练LDA主题模型的流程 主题-单词分布的部分结果如下:
topic 0 : ⒐ 0.0029859442729502916 住宅 0.002257665153592825制造 0.002257665153592825 行为 0.002257665153592825收益 0.0015293860342353582 西北 0.0015293860342353582红星 0.0015293860342353582 轻松 0.0015293860342353582小商品 0.0015293860342353582 搜房网 0.0015293860342353582
topic 1
:
贵宾 0.0030435749795287848
商城 0.0023012396413832903
太平洋保险 0.0015589043032377958
建设 0.0015589043032377958
储蓄 0.0015589043032377958
周四 0.0015589043032377958
完成 0.0015589043032377958
区内 0.0015589043032377958
王志钢 0.0015589043032377958
872944 0.0015589043032377958
topic 2
:
油田 0.0017282527405768633
雀巢 0.0017282527405768633
金千 0.0017282527405768633
山腰 9.052753448486328E-4
代办 9.052753448486328E-4
洋房 9.052753448486328E-4
月饼 9.052753448486328E-4
三星 9.052753448486328E-4
集成 9.052753448486328E-4
大桥 9.052753448486328E-4
topic 3
:
美容 0.0016053818399086595
疯狂 0.0016053818399086595
获取 0.0016053818399086595
名牌 0.0016053818399086595
风神 0.0016053818399086595
小额 0.0016053818399086595
璀璨 0.0016053818399086595
一千 0.0016053818399086595
专注 0.0016053818399086595
发放 0.0016053818399086595
topic 4
:
焦点 0.002957939635962248
搜狐 0.002236490836367011
房屋 0.002236490836367011
玉兰 0.002236490836367011
短期 0.002236490836367011
理疗 0.002236490836367011
4001080000 0.0015150421531870961
命题 0.0015150421531870961
公开 0.0015150421531870961
乐器 0.0015150421531870961
topic 5
:
实验 0.0023698494769632816
每块 0.0023698494769632816
收费 0.0023698494769632816
博览 0.0016053818399086595
重新 0.0016053818399086595
任意 0.0016053818399086595
借款 0.0016053818399086595
保底 0.0016053818399086595
预期 0.0016053818399086595
初二 0.0016053818399086595
topic 6
:
宗旨 0.0016625761054456234
陈勇军 0.0016625761054456234
拨打 0.0016625761054456234
家人 0.0016625761054456234
工业 0.0016625761054456234
百货店 0.0016625761054456234
实业 0.0016625761054456234
6222024000068818521 0.0016625761054456234
18692297994 0.0016625761054456234
13300 0.0016625761054456234
topic 7
:
→ 0.005167018622159958
餐厅 0.00298377126455307
保修 0.00298377126455307
英语 0.0022560220677405596
红 0.0022560220677405596
普通 0.0022560220677405596
学习 0.001528272987343371
龙湖 0.001528272987343371
电大 0.001528272987343371
任意 0.001528272987343371
topic 8
:
登陆 0.0025078877806663513
食宿 0.001698891632258892
急需 0.001698891632258892
建行 0.001698891632258892
葡萄酒 0.001698891632258892
新版 0.001698891632258892
富豪 0.001698891632258892
对比 0.001698891632258892
泥工 0.001698891632258892
相信 8.898956584744155E-4
topic 9
:
体育 0.7940398454666138
活动 0.005577780772000551
优惠 0.0038460372015833855
欢迎 0.003806901630014181
银行 0.0032981408294290304
电话 0.003268789267167449
联系 0.0031611667945981026
公司 0.002769812010228634
地址 0.0024860799312591553
】 0.002339322119951248
topic 10
:
年级 0.0023899467196315527
车主 0.0023899467196315527
过程 0.0016189961461350322
华联 0.0016189961461350322
家电 0.0016189961461350322
大业 0.0016189961461350322
时代 0.0016189961461350322
迪赛尼斯 0.0016189961461350322
稀缺 0.0016189961461350322
稳定 0.0016189961461350322
topic 11
:
利率 0.002570267766714096
知名 0.002570267766714096
南湖 0.0017411491135135293
实现 0.0017411491135135293
立秋 0.0017411491135135293
就读 0.0017411491135135293
罗马 0.0017411491135135293
广电局 0.0017411491135135293
独具 0.0017411491135135293
静候 0.0017411491135135293
topic 12
:
哥哥 0.0029536776710301638
家里 0.0029536776710301638
化妆 0.0029536776710301638
名品 0.0022332684602588415
一 0.0022332684602588415
四川 0.0015128592494875193
二手车 0.0015128592494875193
订购 0.0015128592494875193
多种 0.0015128592494875193
潜力 0.0015128592494875193
topic 13
:
建行 0.002435001078993082
开发商 0.0016495168674737215
美容 0.0016495168674737215
奔驰 0.0016495168674737215
比例 0.0016495168674737215
英伦 0.0016495168674737215
开通 0.0016495168674737215
开班 0.0016495168674737215
打开 0.0016495168674737215
英国 0.0016495168674737215
topic 14
:
增值 0.002355444012209773
[验] 0.002355444012209773
公开 0.0015956234419718385
打印机 0.0015956234419718385
家中 0.0015956234419718385
宾馆 0.0015956234419718385
12000 0.0015956234419718385
渠道 0.0015956234419718385
租赁 0.0015956234419718385
无效 0.0015956234419718385
topic 15
:
自由 0.0024857670068740845
巴拉巴 0.0024857670068740845
丰 0.0024857670068740845
朝阳 0.001683906652033329
家人 0.001683906652033329
84725588 0.001683906652033329
老弟 0.001683906652033329
商住 0.001683906652033329
县委 0.001683906652033329
德国 8.820463554002345E-4
topic 16
:
¥10亿 0.002975110663101077
楼下 0.002249473938718438
感恩 0.002249473938718438
独栋 0.002249473938718438
前来 0.0015238370979204774
手机 0.0015238370979204774
申请 0.0015238370979204774
乐 0.0015238370979204774
考点 0.0015238370979204774
3008300 0.0015238370979204774
topic 17
:
批发 0.00239548715762794
总监 0.0016227493761107326
车子 0.0016227493761107326
饭店 0.0016227493761107326
伙伴 0.0016227493761107326
直属 0.0016227493761107326
事后 0.0016227493761107326
翰林 0.0016227493761107326
专题片 0.0016227493761107326
装修 8.500116528011858E-4
topic 18
:
期待 0.0024758405052125454
价 0.0016771822702139616
你好 0.0016771822702139616
决定 0.0016771822702139616
助剂 0.0016771822702139616
人员 0.0016771822702139616
雄伟 0.0016771822702139616
只用 0.0016771822702139616
享受 8.785240934230387E-4
四川 8.785240934230387E-4
topic 19
:
房价 0.003103474387899041
底价 0.0023465293925255537
湖南 0.0015895843971520662
凡 0.0015895843971520662
送礼 0.0015895843971520662
恒大 0.0015895843971520662
一生 0.0015895843971520662
代言人 0.0015895843971520662
专车 0.0015895843971520662
大唐 0.0015895843971520662
topic 20
:
企业主 0.0023483068216592073
讲师 0.0023483068216592073
6222021001055293358 0.0023483068216592073
首发 0.0015907884808257222
认购 0.0015907884808257222
请问 0.0015907884808257222
发布 0.0015907884808257222
中午 0.0015907884808257222
开幕 0.0015907884808257222
⒍ 0.0015907884808257222
topic 21
:
重新 0.002323663793504238
帮忙 0.002323663793504238
85654475 0.002323663793504238
宾 0.002323663793504238
中国 0.0015740948729217052
学历 0.0015740948729217052
" 0.0015740948729217052
温州 0.0015740948729217052
好久 0.0015740948729217052
钢板 0.0015740948729217052
topic 22
:
可口 0.0024103878531605005
形象 0.0024103878531605005
减轻 0.0024103878531605005
高层 0.0016328433994203806
爸爸 0.0016328433994203806
基金 0.0016328433994203806
营业额 0.0016328433994203806
意大利 0.0016328433994203806
正常 0.0016328433994203806
吉智 0.0016328433994203806
topic 23
:
关系 0.0024738647043704987
经营 0.0016758438432589173
美容 0.0016758438432589173
梦想 0.0016758438432589173
喷漆 0.0016758438432589173
肌肤 0.0016758438432589173
刘汉琳 0.0016758438432589173
索菲 0.0016758438432589173
依依 0.0016758438432589173
欢迎 8.778230403549969E-4
topic 24
:
考试 0.0016652129124850035
上班 0.0016652129124850035
金条 0.0016652129124850035
宝 0.0016652129124850035
澳门 0.0016652129124850035
粘贴 0.0016652129124850035
收缩 0.0016652129124850035
18800574923 0.0016652129124850035
豪华 8.722544298507273E-4
老师 8.722544298507273E-4
topic 25
:
长期 0.0030594731215387583
开发区 0.0023132602218538523
低价 0.0023132602218538523
⑥ 0.0023132602218538523
转告 0.0023132602218538523
新 0.0015670472057536244
得到 0.0015670472057536244
[通] 0.0015670472057536244
融资 0.0015670472057536244
万科 0.0015670472057536244
topic 26
:
开发区 0.002339445985853672
石油 0.0015847859904170036
宁波 0.0015847859904170036
更换 0.0015847859904170036
不用 0.0015847859904170036
会议 0.0015847859904170036
初三 0.0015847859904170036
汽车站 0.0015847859904170036
抽空 0.0015847859904170036
实用 0.0015847859904170036
topic 27
:
代办 0.0016745076281949878
代表 0.0016745076281949878
女性 0.0016745076281949878
13825139678 0.0016745076281949878
承担 0.0016745076281949878
影响力 0.0016745076281949878
13934141989 0.0016745076281949878
槐花 0.0016745076281949878
沐 0.0016745076281949878
过敏 0.0016745076281949878
topic 28
:
婚礼 0.00862991251051426
海尔 0.002210969338193536
电影 0.002210969338193536
小乔 0.002210969338193536
15953174009 0.002210969338193536
茶店 0.002210969338193536
7627292. 0.002210969338193536
15985917304 0.002210969338193536
新余 0.001497753313742578
资料 0.001497753313742578
topic 29
:
【 0.021667908877134323
你 0.015670640394091606
您好 0.01555958017706871
光临 0.014560035429894924
尊敬 0.014337914064526558
现在 0.013005186803638935
】 0.012338823638856411
享受 0.010783976875245571
信用 0.009451250545680523
详情 0.007896402850747108
topic 30
:
西吉 0.0024778195656836033
封顶 0.0016785229090601206
押金 0.0016785229090601206
海外 0.0016785229090601206
澜庭 0.0016785229090601206
账户 0.0016785229090601206
原因 0.0016785229090601206
6222021001036927348 0.0016785229090601206
欧莱雅 0.0016785229090601206
推荐 8.792263106442988E-4
pre name= code >
原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/9510.html
分布式文件系统,分布式数据库区块链并行处理(MPP)数据库,数据挖掘开源大数据平台数据中台数据分析数据开发数据治理数据湖数据采集相关文章
- java个人微信消息接收_java接口接收json数据
- Java爱心代码_java怎么敲入代码
- java分布式事务框架_Java分布式事务,及解决方案
- java使用多线程导出excel「建议收藏」
- java出现中文乱码_JAVA中文显示乱码问题「建议收藏」
- java启动器_JAVA基础:Java 启动器如何查找类
- Java实现简单爬虫——爬取疫情数据
- JAVA 新提案:努力简化Hello World,让初学者更好地接受 Java !
- HDFS的Java客户端操作代码(查看HDFS下的文件是否存在)详解大数据
- Java操作MySQL数据库:实现看似不可能的梦想(java连接mysql数据库)
- Linux平台上Java新版本发布(linux发布java)
- Oracle终止支持Java:转向新时代(oracle抛弃java)
- Java 连接 Redis:实现数据快速传输(java连redis)
- 数据解决Redis中 Java的过期数据问题(redisjava过期)
- MySQL与Java的结合:实现强大的数据持久化功能(mysql与java)
- Java调用Redis实现高性能数据存储(java调用redis)
- 使用Java实现Redis数据存储(redis集成java)
- 实现Java实现Redis数据过期管理(redisjava过期)
- 使用Java轻松读取Redis数据 – 一步一步指南!(java读取redis)
- Java与MySQL的无缝衔接:实现高效数据操作(java中使用mysql)
- 数据Java操作MySQL库:获取你所需的数据(java获取mysql)
- 存储过程使用Java语言执行Oracle存储过程(java执行oracle)
- 从Java到Oracle轻松操作数据库(java写oracle库)
- XML到Java代码的数据绑定之对象
- Java利用剪贴板实现交换程序间数据的方法