数据分析:ELK【Elasticsearch(实时分布式搜索和分析引擎,用于全文搜索、结构化搜索、分析)、Logstash(功能类似Flume,用于日志采集)、Kibana(数据可视化工具)】
2023-09-27 14:20:41 时间
ELK开源日志框架同数据 https://www.elastic.co
- Elasticsearch 是一个基于JSON的分布式搜索和分析引擎
- Logstash 是动态数据搜集管道,拥有可扩展的插件生态系统
- Kibana 可以让您的数据变得有形有样,是一个可扩展的用户界面
Elasticsearch启动:elk用户下,进入 /usr/local/elk目录执行:bin/elasticsearch
Kibana启动:elk用户下,进入 /usr/local/kabana目录执行:bin/kibana
GET _cat/indices
GET _cat/nodes
GET atguigu/_search
#创建Mapping
PUT my_index
{
"mappings": {
"_doc":{
"properties":{
"stu_id":{
"type":"keyword"
},
"name":{
"type":"text"
},
"sex":{
"type":"integer"
}
}
}
}
}
GET my_index/_mapping
POST my_index/_doc
{
"stu_id":"1001",
"name":"张三",
"sex":0
}
PUT my_index/_doc/001
{
"stu_id":"1002",
"name":"lisi",
"sex":1
}
PUT my_index/_doc/003
{
"stu_id":"1003",
"name":"youzong",
"sex":1
}
GET my_index/_search
#创建Mapping
PUT my_index2/_doc/001
{
"dept_id":"10",
"dept_name":"sale",
"loc":1
}
PUT my_index3/_doc/001
{
"emp_id":"1001",
"emp_name":"wangwu",
"birth":"2020-03-10"
}
PUT my_index3/_doc/002
{
"emp_id":"1002",
"emp_name":"haizong",
"birth":"1995-10-04"
}
GET my_index3/_search
#keyword不支持分词操作
GET _analyze
{
"keyword":"我是程序员"
}
#text支持分词操作
GET _analyze
{
"text":"我是程序员"
}
GET _analyze
{
"analyzer": "ik_smart",
"text":"我是程序员"
}
GET _analyze
{
"analyzer": "ik_max_word",
"text":"脚踩电灯泡乒乓球"
}
#准备数据
#1.mapping
PUT student
{
"mappings": {
"_doc":{
"properties":{
"class_id":{
"type":"keyword"
},
"name":{
"type":"keyword"
},
"age":{
"type":"integer"
},
"sex":{
"type":"keyword"
},
"score":{
"type":"double"
},
"favo":{
"type":"text",
"analyzer":"ik_max_word"
}
}
}
}
}
PUT student/_doc/1001
{
"class_id":"191010",
"name":"youzong",
"age":"18",
"sex":"animal",
"score":70.1,
"favo":"男,炒股钱"
}
PUT student/_doc/1002
{
"class_id":"191010",
"name":"banzhang",
"age":"19",
"sex":"male",
"score":85.5,
"favo":"youzong"
}
PUT student/_doc/1003
{
"class_id":"191010",
"name":"haizong",
"age":"17",
"sex":"male",
"score":85.3,
"favo":"胸口碎大石羽毛球"
}
PUT student/_doc/1004
{
"class_id":"191010",
"name":"yulaoda",
"age":"20",
"sex":"male",
"score":99.9,
"favo":"脚踩电灯泡乒乓球"
}
PUT student/_doc/8888
{
"class_id":"191010",
"name":"whx",
"age":"30",
"sex":"male",
"score":92.9,
"favo":"电灯泡乒乓球手拿着乒乓球"
}
PUT student/_doc/9999
{
"class_id":"191010",
"name":"whx",
"age":"30",
"sex":"male",
"score":92.9,
"favo":"quick brown fox and quick brown fox"
}
PUT student/_doc/6666
{
"class_id":"191010",
"name":"whx",
"age":"30",
"sex":"male",
"score":92.9,
"favo":"quick brown fox"
}
PUT student/_doc/5555
{
"class_id":"191010",
"name":"whx",
"age":"30",
"sex":"male",
"score":92.9,
"favo":"quick fox"
}
PUT student/_doc/3333
{
"class_id":"191010",
"name":"whx",
"age":"30",
"sex":"male",
"score":92.9,
"favo":"quick and fox"
}
PUT student/_doc/1005
{
"class_id":"191125",
"name":"白发魔女",
"age":"18",
"sex":"female",
"score":99.9,
"favo":"染头发"
}
PUT student/_doc/1006
{
"class_id":"191125",
"name":"huihui",
"age":"21",
"sex":"male",
"score":0,
"favo":"橄榄球"
}
GET _analyze
{
"analyzer": "ik_max_word",
"text":"橄榄球"
}
GET _analyze
{
"analyzer": "standard",
"text":"you're quick brown fox"
}
GET _cat/indices
GET student/_search
#filter:查询字段全值匹配,不会做分词处理
GET student/_search
{
"query": {
"bool": {
"filter": {
"term": {
"class_id": "191010"
}
}
}
}
}
#match
GET student/_search
{
"query": {
"match": {
"favo": "球"
}
}
}
#match
GET student/_search
{
"query": {
"match_phrase": {
"favo": "乒乓球"
}
}
}
#match
GET student/_search
{
"query": {
"match_phrase": {
"favo": "quick brown"
}
}
}
# 统计数量
#match_phrase
GET student/_count
{
"query": {
"match_phrase": {
"favo": "quick brown"
}
}
}
#过滤191010班,爱好球的学生
GET student/_search
{
"query": {
"bool": {
"filter": {
"term": {
"class_id": "191010"
}
},
"must": [
{
"match": {
"favo": "球"
}
}
]
}
}
}
#
GET student/_search
{
"query": {
"fuzzy": {
"name":{
"value": "yulaoda"
}
}
}
}
#count(*) group by class_id;
GET student/_search
{
"aggs": {
"count_by_class_id": {
"terms": {
"field": "class_id",
"size": 2
}
}
}
}
# max(age)
GET student/_search
{
"aggs": {
"max_age": {
"max": {
"field": "age"
}
}
}
}
# max(age);count(*) group by class_id
GET student/_search
{
"aggs": {
"count_by_class_id": {
"terms": {
"field": "class_id",
"size": 2
}
},
"max_age": {
"max": {
"field": "age"
}
}
}
}
#max(age),count(*) group by class_id
GET student/_search
{
"aggs": {
"group_by_class_id": {
"terms": {
"field": "class_id",
"size": 10
},
"aggs": {
"max_age": {
"max": {
"field": "age"
}
}
}
}
}
}
#过滤male,爱好球的学生。同时计算每个班级多少人,其中最大年龄是多大。
GET student/_search
{
"query": {
"bool": {
"filter": {
"term": {
"sex": "male"
}
},
"must": [
{
"match": {
"favo": "球"
}
}
]
}
},
"aggs": {
"countByClass": {
"terms": {
"field": "class_id",
"size": 10
},
"aggs": {
"max_age": {
"max": {
"field": "age"
}
}
}
}
},
"from": 20,
"size": 10
}
参考资料:
短语精确匹配:Elasticsearch集群短语匹配(match查询)
Elasticsearch - 短语匹配(match_phrase)以及slop参数
match_phrase短语匹配和近似匹配
docker 搭建 ELK
ElasticSearch基本操作