大叔经验分享(35)lzo格式支持
建表语句
CREATE EXTERNAL TABLE `my_lzo_table`(`something` string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS INPUTFORMAT
'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
1 lzo
# yum install lzo lzop
手工安装:http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz
2 hadoop-lzo
# wget https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/hadoop-gpl-packing/hadoop-gpl-packaging-0.6.1-1.x86_64.rpm
# rpm -ivh hadoop-gpl-packaging-0.6.1-1.x86_64.rpm# ls /opt/hadoopgpl/lib
cdh4.0.1 guava-12.0.jar hadoop-lzo-0.4.17.jar hadoop-lzo.jar pig-0.10.0 pig-0.6.0 pig-0.7.0 pig-0.8.0 protobuf-java-2.4.1.jar slf4j-api-1.5.8.jar slf4j-log4j12-1.5.10.jar yamlbeans-0.9.3.jar
# ls /opt/hadoopgpl/native/Linux-amd64-64/
libgplcompression.a libgplcompression.la libgplcompression.so libgplcompression.so.0 libgplcompression.so.0.0.0 LzoCompressor.lo LzoCompressor.o LzoDecompressor.lo LzoDecompressor.o
手工安装:https://github.com/twitter/hadoop-lzo/
3 报错
1)报错:IOException: No LZO codec found, cannot run.
core-site.xml
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
2)报错:Error: java.io.IOException: cannot find class com.hadoop.mapred.DeprecatedLzoTextInputFormat
hive
# export HADOOP_CLASSPATH=/opt/hadoopgpl/lib/hadoop-lzo.jar
spark
# export SPARK_CLASSPATH=/opt/hadoopgpl/lib/hadoop-lzo.jar
or
# cp /opt/hadoopgpl/lib/hadoop-lzo.jar $SPARK_HOME/jars/
3)报错:IOException:java.lang.RuntimeException: native-lzo library not available
hive
# export JAVA_LIBRARY_PATH=/opt/hadoopgpl/native/Linux-amd64-64/
spark
# export LD_LIBRARY_PATH=/opt/hadoopgpl/native/Linux-amd64-64/
4)mr报错:Error: java.io.IOException: cannot find class com.hadoop.mapred.DeprecatedLzoTextInputFormat
at org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getRecordReader(CombineHiveInputFormat.java:689)
at org.apache.hadoop.mapred.MapTask$TrackedRecordReader.<init>(MapTask.java:169)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:429)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
$ cp /opt/hadoopgpl/lib/hadoop-lzo.jar $HADOOP_HOME/share/hadoop/common/lib/
5)mr报错:Caused by: java.lang.RuntimeException: native-lzo library not available
mapred-site.xml
<property>
<name>mapreduce.map.java.opts</name>
<value>-Djava.library.path=/opt/hadoopgpl/native/Linux-amd64-64</value>
</property>
这种改法的缺点是在hive中修改时会被覆盖,
hive> set mapreduce.map.java.opts=-Xmx3072m -Djava.library.path=/opt/hadoopgpl/native/Linux-amd64-64;
另一种改法是
$ cp /opt/hadoopgpl/lib/native/Linux-amd64-64/* $HADOOP_HOME/lib/native/
根据你安装版本的不同也有可能是 /usr/lib/hadoop/lib/native
相关文章
- VB键盘鼠标无动作调用程序尝试的案例分享
- 安卓app定制化合规检测软件分享
- 天翼云盘直链管理平台-支持多账号、永久直链、分享目录
- 数据分享|逻辑回归、随机森林、SVM支持向量机预测心脏病风险数据和模型诊断可视化|附代码数据
- 虹科分享|怎么做才可以保护你的数据隐私?
- 技术分享 | MySQL 大表添加唯一索引的总结
- PYTHON银行机器学习:回归、随机森林、KNN近邻、决策树、高斯朴素贝叶斯、支持向量机SVM分析营销活动数据|数据分享|附代码数据
- 大咖Live | 视见医疗科技王少彬:宫颈癌放疗精准临床靶区勾画实战经验分享
- 「MySQL经验分享——我对MySQL的一些见解」(mysql心得)
- SQLTranscation的一些总结分享
- 一个PHP的远程图片抓取函数分享
- c#日志记录帮助类分享
- js实现屏幕自适应局部代码分享