
START CLOUDERA Server
---------------------

docker run --hostname=quickstart.cloudera --privileged=true -t -i  --publish-all=true -p 8888:8888 -p 8080:80 -p 50070:50070 -p 8088:8088  -p 50075:50075 -p 8032:8032 -p 8042:8042 -p 19888:19888 cloudera/quickstart /usr/bin/docker-quickstart


Copy Wordcount java file to docker container
------------------------------------------------
docker cp e:/MSc_Datascience/BigDataHadoop/Slides/mapreduce/WordCount.java <containerid>:/tmp/WordCount.java
docker cp e:/MSc_Datascience/BigDataHadoop/Slides/mapreduce/WordCount.java a53d:/tmp/WordCount.java

COPY Text file to HDFS 
----------------------
hadoop fs -mkdir DATA
docker cp E:/sample1.txt <containerid>:/tmp/sample1.txt
docker cp E:/sample2.txt <containerid>:/tmp/sample2.txt

docker cp E:/sample1.txt a53d:/tmp/sample1.txt
docker cp E:/sample2.txt a53d:/tmp/sample2.txt

hadoop fs -copyFromLocal /tmp/sample1.txt DATA/sample1.txt
hadoop fs -copyFromLocal /tmp/sample2.txt DATA/sample2.txt


COMPILE WordCount java file & create jar
----------------------------------------
mkdir -p build
javac -cp /usr/lib/hadoop/*:/usr/lib/hadoop-mapreduce/* /tmp/WordCount.java -d build -Xlint

jar -cvf wordcount.jar -C build/ . 

Execute Mapreduce job
------------------------
hadoop jar wordcount.jar WordCount DATA DATA2 