2

【Hive SQL】速查

 2 years ago
source link: https://www.guofei.site/2018/03/01/hive2.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.

【Hive SQL】速查

2018年03月01日

Author: Guofei

文章归类: 1-1-算法平台 ,文章编号: 162


版权声明:本文作者是郭飞。转载随意,但需要标明原文链接,并通知本人
原文链接:https://www.guofei.site/2018/03/01/hive2.html

Edit

建表

1. 建一个普通表

DROP TABLE IF EXISTS app.app_example;
CREATE TABLE app.app_example(
id int comment 'id对应的解释',
col1 int comment '解释1',
col2 int comment '解释2'
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';

2. 建一个分区表

DROP TABLE IF EXISTS app.app_example;
CREATE TABLE app.app_example(
id int comment 'id对应的解释',
col1 int comment '解释1',
col2 int comment '解释2'
)
partitioned by (dt string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
location '/user/guofei/app_example'
stored as orc
TBLPROPERTIES ('author'='guofei');

插入数据

1. 从本地数据插入

load data local inpath 'file_path' into table tbname;
load data local inpath 'file_path' overwrite into table tbname; -- overwrite抹除原数据

LOAD DATA LOCAL INPATH  'file_path' INTO TABLE tbname PARTITION (country='US', state='CA') -- 插入分区表,增量添加,不是覆盖
LOAD DATA LOCAL INPATH  'file_path' OVERWIRTE INTO TABLE tbname PARTITION (country='US', state='CA') -- 插入分区表,带overwirte表示覆盖

2. 从查询插入数据

create  table tb2 as select * from tb1; -- 新建一个表,并以overwrite的方式写入数据
insert overwrite table tb2 select * from tb1; -- 覆盖插入数据
insert into table tb2 select * from tb1; -- 新增插入数据


-- 静态方式插入分区表:
insert overwrite table tmp.tmp_test
partition (country='US',state='OR')
select names from tbname1;

-- 动态方式插入到分区表:
set hive.exec.dynamic.partition.mode=nonstrict;
INSERT OVERWRITE TABLE app.app_example PARTITION(dt)
SELECT col1,col2,sysdate(-1) as dt
FROM
app.example_1
-- 动态方式必须把分区key放在最后,优点是可以同时向多个分区插入数据

-- overwrite表示覆盖插入,去掉overwrite表示增量添加
DROP TABLE IF EXISTS app.app_example;
CREATE TABLE app.app_example
AS
SELECT * FROM app.example_1;

删除分区

ALTER TABLE tablename DROP IF EXISTS PARTITION(year = 2015, month = 10, day = 1);

清除数据

TRUNCATE TABLE tablename;

对列增删改查

Hive修改表名,列名,列注释,表注释,增加列,调整列顺序,属性名等操作

ALTER TABLE name RENAME TO new_name
ALTER TABLE name ADD COLUMNS (col_spec[, col_spec ...])
ALTER TABLE name DROP [COLUMN] column_name
ALTER TABLE name CHANGE column_name new_name new_type
ALTER TABLE name REPLACE COLUMNS (col_spec[, col_spec ...])

重命名表

Rename To…

ALTER TABLE table_name RENAME TO new_table_name;

改变列名/类型/位置/注释

ALTER TABLE table_name
CHANGE COLUMN
col_old_name col_new_name column_type
[COMMENT col_conmment]
[FIRST|AFTER column_name];

用来修改表的列名、数据类型,列注释和列所在的位置 FIRST将列放在第一列,AFTER col_name将列放在col_name后面一列,

alter table tablename1
CHANGE COLUMN
rpt_tag rpt_tag int comment '1新客,2新转老,3新注册,5老客';

增加列

ADD COLUMNS允许用户在当前列的末尾,分区列之前添加新的列

ALTER TABLE table_name ADD
COLUMNS (col_name data_type [CONMMENT col_comment], ...);

select示例

1. 重复数据处理

看看重复数据的样子

select id,count(id)
from tablename
group by id
having count(id)>1

2. 选取前n数据

select * from
(
select row_number() over(partition by id order by col1,col2) as rownum , *
from table
) as temp
where temp.rownum = 1

指的注意的是,如果col1,col2仍有重复的话,可能每次返回的结果都不一样(一个小坑)

3. 抽样查询

select * from tmp.tmp_example tablesample(bucket 3 out of 30 on rand()) s;


-- 以某一列为分桶标准,每次运行结果都一样,实际上等价于 where id%10=0
select * from tmp.tmp_example tablesample(bucket 1 out of 10 on id) s;

-- 前20%行
select * from tmp.tmp_example tablesample(20 percent) s;

其它操作

show databases
use tmp
  1. hive模糊搜索表
    show tables
    show tables like '*name*';
    show table extended like 'test_partition';
    -- 加上 extended 后,会额外显示以下信息:
    -- location,format,columns,partition,文件数,文件大小,最后一次编辑时间和访问时间
    SHOW TBLPROPERTIES [table_name] -- 显示指定表名的TBLPROPERTIES信息(好像show create table [table_name]更全)
    
  2. 查看表结构信息
    desc formatted table_name;
    desc table_name;
    
  3. 查看分区信息
    show partitions table_name;
    

参考资料

https://blog.csdn.net/helloxiaozhe/article/details/80749094
http://blog.csdn.net/mulangren1988/article/details/77509195


您的支持将鼓励我继续创作!

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK