START CLOUDERA Server
---------------------

docker run --hostname=quickstart.cloudera --privileged=true -t -i  --publish-all=true -p 8888:8888 -p 8080:80 -p 50070:50070 -p 8088:8088  -p 50075:50075 -p 8032:8032 -p 8042:8042 -p 19888:19888 cloudera/quickstart /usr/bin/docker-quickstart


Copy Text file to docker container
------------------------------------------------
docker cp C:/pig/iris.txt <containerid>:/tmp/iris.txt
docker cp C:/pig/iris.txt c988:/tmp/iris.txt


COPY Text file to HDFS 
----------------------
hadoop fs -mkdir DATA

hadoop fs -copyFromLocal /tmp/iris.txt DATA/iris.txt

#view output
hadoop fs -cat DATA/iris.txt


To get PIG command prompt
-------------------------
type pig and press enter

Load data in PIG 
---------------
Relation_name = LOAD 'Input file path' USING function as schema;

flower = LOAD 'DATA/iris.txt' USING PigStorage(',') as ( sepal_length:int, sepal_width:int, petal_length:int, petal_width:int, flower_class:chararray);
DUMP flower;
#name = foreach flower generate flower_class as id; 
#dump name; 
B = GROUP flower BY flower_class;   
#DUMP B; 
Result = FOREACH B GENERATE flower.flower_class, AVG(flower.sepal_length);  
DUMP Result; 



USER DEFINED FUNCTION
--------------------
Copy files to docker
------------
docker cp C:/pig/stock.csv <containerid>:/tmp/stock.csv
docker cp C:/pig/stock.csv a53d:/tmp/stock.csv
docker cp C:/pig/pig_UDF.py <containerid>:/tmp/pig_UDF.py
docker cp C:/pig/pig_UDF.py a53d:/tmp/pig_UDF.py
docker cp C:/pig/piggybank-0.15.0.jar a53d:/tmp/piggybank-0.15.0.jar


copy files to HDFS
------------------
hadoop fs -mkdir DATA
hadoop fs -copyFromLocal /tmp/stock.csv DATA/stock.csv

PIG scripts - execute in pig interface
--------------------------------------
REGISTER '/tmp/pig_UDF.py' using jython as myudfs;
REGISTER '/tmp/piggybank-0.15.0.jar';

records = LOAD 'DATA/stock.csv' USING  org.apache.pig.piggybank.storage.CSVExcelStorage(',', 'NO_MULTILINE', 'UNIX', 'SKIP_INPUT_HEADER') AS
(Date:datetime,AdjClose:int,Close:int,High:int,Low:int,Open:int,Volume:int);

record= foreach records GENERATE Volume, Close, GetYear(Date) as Year;

yearData = GROUP record BY Year;   

 vol_wt_avg_price = FOREACH yearData GENERATE  myudfs.calVWAP(record.Year,record.Volume,record.Close);
 #DUMP vol_wt_avg_price;
 STORE vol_wt_avg_price INTO '/user/root/PRICE';

hadoop fs -cat /user/root/PRICE/part-r-00000