

START CLOUDERA Server
---------------------

docker run --hostname=quickstart.cloudera --privileged=true -t -i  --publish-all=true -p 8888:8888 -p 8080:80 -p 50070:50070 -p 8088:8088  -p 50075:50075 -p 8032:8032 -p 8042:8042 -p 19888:19888  -p 10000:10000 cloudera/quickstart /usr/bin/docker-quickstart



Copy Text file to docker container
------------------------------------------------
docker cp E:/MSc_Datascience/BigDataHadoop/Slides/hive/Online_Retail.csv <containerid>:/tmp/Online_Retail.csv
docker cp C:/Users/suchi/OneDrive/MSc_Datascience/BigDataHadoop\Slides/hive/Online_Retail.csv 21f3c6b92ab3:/tmp/Online_Retail.csv


To get HIVE command prompt

-------------------------
type hive and press enter


Create Database Statement
----------------------------

CREATE SCHEMA user_db;


SHOW DATABASES;



Create TABLE
-------------

CREATE TABLE IF NOT EXISTS Online_Retail ( InvoiceNo STRING, StockCode STRING, Description STRING,
Quantity INT, InvoiceDate TIMESTAMP,UnitPrice double ,CustomerID INT,Country STRING )
COMMENT 'Online Retail Data Set'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;

CREATE TABLE IF NOT EXISTS tmp(InvoiceNo STRING, StockCode STRING, Description STRING,
Quantity INT, InvoiceDate STRING,UnitPrice double ,CustomerID STRING,Country STRING) 
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;


Load Data
------------
LOAD DATA LOCAL INPATH '/tmp/Online_Retail.csv'
OVERWRITE INTO TABLE tmp;

INSERT INTO TABLE Online_Retail 
SELECT InvoiceNo, StockCode, Description, Quantity,
from_unixtime(unix_timestamp(InvoiceDate, 'dd-MM-yyyy HH:mm')),
UnitPrice,CustomerID,Country
FROM tmp;



Select few rows
--------------------
 select * from tmp LIMIT 5;

 select * from Online_Retail LIMIT 5;
 
 select InvoiceDate from tmp LIMIT 5;
 
 
 Group By operation
 --------------------------------------
 
 SELECT Country,count(*) FROM Online_Retail GROUP BY Country;
 
 
 
 SELECT * FROM Online_Retail WHERE UnitPrice>1000 AND Country = 'United Kingdom';
 
 
 
Drop table
-------------------
 DROP TABLE IF EXISTS tmp;
 
 DROP TABLE IF EXISTS Online_Retail;
 
 

 
 select from_unixtime(unix_timestamp(InvoiceDate, 'dd-MM-yyyy HH:mm')) from Online_Retail LIMIT 5;