[robot~]hadoop fs -du h /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201901516.4 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201901/201901.txt
这是直接从linux本地load的文件到hdfs上,文件实际数据大小是516.4Mb
1.
2.
3.
1.hive中不使用压缩,进行计算与存储
--1.无压缩算法下进行数据存储计算。set hive.exec.compress.output=false;--默认就是false的insert overwrite table t_fin_demo partition(staits_date ='201900')select
name,
id_type,
idcard,
org,
loan_no,
busi_type,
busi_category,
open_date,
dure_date,
loan_amount,
happen_time,
amout,
due_amt,
stat
from t_fin_demo where staits_date ='201901';2.使用du -h命令查看hdfs上文件存储情况
[finance@master2-dev software]$ hadoop fs -du -h /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201900271.0 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201900/000000_0
271.0 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201900/000001_0
4.7 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201900/000002_0
3.程序运行时间
Total MapReduce CPU Time Spent:54 seconds 200 msec
Time taken:36.445 seconds
1.使用Gzip进行压缩存储
set hive.exec.compress.output=true;set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec;insert overwrite table t_fin_demo partition(staits_date ='201904')select
name,
id_type,
idcard,
org,
loan_no,
busi_type,
busi_category,
open_date,
dure_date,
loan_amount,
happen_time,
amout,
due_amt,
stat
from t_fin_demo where staits_date ='201901';2.使用du -h命令查看hdfs上文件存储情况
[finance@master2-dev hadoop]$ hadoop fs -du -h /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=20190475.9 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201904/000000_0.gz75.9 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201904/000001_0.gz1.3 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201904/000002_0.gz3.程序运行时间
Total MapReduce CPU Time Spent:1 minutes 33 seconds 430 msec
OK
Time taken:62.436 seconds
1.使用lzo进行压缩存储
set hive.exec.compress.output=true;set mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzoCodec;insert overwrite table t_fin_demo partition(staits_date ='201905')select
name,
id_type,
idcard,
org,
loan_no,
busi_type,
busi_category,
open_date,
dure_date,
loan_amount,
happen_time,
amout,
due_amt,
stat
from t_fin_demo where staits_date ='201901';2.使用du -h命令查看hdfs上文件存储情况
[finance@master2-dev hadoop]$ hadoop fs -du -h /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201905121.9 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201905/000000_0.lzo_deflate121.9 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201905/000001_0.lzo_deflate2.1 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201905/000002_0.lzo_deflate3.程序运行时间
Total MapReduce CPU Time Spent:58 seconds 700 msec
OK
Time taken:42.45 seconds
1.使用lzop进行压缩存储
set hive.exec.compress.output=true;set mapreduce.output.fileoutputformat.compress.codec=com.hadoop.compression.lzo.LzopCodec;insert overwrite table t_fin_demo partition(staits_date ='201906')select
name,
id_type,
idcard,
org,
loan_no,
busi_type,
busi_category,
open_date,
dure_date,
loan_amount,
happen_time,
amout,
due_amt,
stat
from t_fin_demo where staits_date ='201901';2.使用du -h命令查看hdfs上文件存储情况
[finance@master2-dev hadoop]$ hadoop fs -du -h /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201906121.9 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201906/000000_0.lzo121.9 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201906/000001_0.lzo2.1 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201906/000002_0.lzo3.程序运行时间
Total MapReduce CPU Time Spent:47 seconds 280 msec
OK
Time taken:34.439 seconds
1.使用Bzip2进行压缩存储
set hive.exec.compress.output=true;set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.BZip2Codec;insert overwrite table t_fin_demo partition(staits_date ='201907')select
name,
id_type,
idcard,
org,
loan_no,
busi_type,
busi_category,
open_date,
dure_date,
loan_amount,
happen_time,
amout,
due_amt,
stat
from t_fin_demo where staits_date ='201901';2.使用du -h命令查看hdfs上文件存储情况
[finance@master2-dev hadoop]$ hadoop fs -du -h /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=20190752.5 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201907/000000_0.bz252.5 M /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201907/000001_0.bz2935.2 K /user/finance/hive/warehouse/fdm_sor.db/t_fin_demo/staits_date=201907/000002_0.bz23.程序运行时间
Total MapReduce CPU Time Spent:2 minutes 47 seconds 530 msec
OK
Time taken:96.42 seconds