- Big
- cat hive test.txt
- cp hivetest..txt pigtest.txt
- hdfs dfs -mkdir pigpractice
- hdfs dos -put pigtest.txt pigpractice
- —
- hdfs dfs -ls pigpractice
- —
- pig
- grunt> student = LOAD ‘pigpractice/pigtest.txt’ USING PigStorage(‘,’)
- as (sid:int,sname:chararray,marks:int,course:chararray);
- ————-
- // displaying the data from a table.
- dump student;
- ————
- // copying the data from pig table to text file into HDFS.
- STORE student INTO ‘pigpractoc/test’
- >> USING PigStorage(‘#’)
- —
- or
- > STORE student INFO ‘pigpractice/test2’ USING PigStorage(‘\t’);
- ——————
- // display the resulted files in HDFS.
- oracle: hdfs dfs -ls pigpractice/test
- or
- cloudera
- hdfs dfs -cat pigpractice/test2/part*
- —————
- // copy the text file from hdfs to linux
- hdfs dfs -get pigpractice/test/part-m-00000
- cat part-m-0000
- ————
- //display the student data by grouping on course and store into the groupcourse bag
- grunt> coursegroup = GROUP student BY course;
- grunt> dump coursedata;
- ——————
- //Display student name and marks with the alias column studentmarks from student bag and store into aliascoursedata bag.
- grunt>coursedata = FOREACH student GENERATE sname AS studentname,marks AS aliascoursedata;
- ——————
- grunt>coursedata = FOREACH student GENERATE sname AS studentname,marks AS aliascoursedata;
- grunt> studmarks = FILTER student BY marks > 90;
- grunt> dump student;
- —————
- //Display student name and marks from student bag, who got more than 85 and store the result rows into bag studentmarks.
- studentmarks = FILTER (FOREACH student GENERATE sname,marks) BY marks > 85;
- ———-
- grunt>student5 = LIMIT student 5;
- grunt>dump student5;
- ————
- //display the student name, course and marks from student bag by grouping on course and store into the groupstud bag.
- grunt> groupstud = GROUP (FOREACH student GENERATE sname,course,marks) BY course;
- grunt>dump groupstud
- ———
- //Display all the students with the descending order on marks and store into bag studorder.
- grunt> studorder = ORDER student BY marks desc;
- grunt> dump studorder;
- ———
- display the student name, course and marks from student bag with the ascending order on marks and store into the studordernew bag.
- grunt> studordernew = ORDER (FOREACH student GENERATE sname,marks) BY marks ;
- grunt> dump studordernew;
- ————
- //Display all the students by grouping on course by using column index and store into bag temp.
- grunt> describe student;
- student:{sid: int,sname: chararray,marks: int,course: chararry}
- grunt> temp = GROUP student by $3;
- grunt> dump temp;
- ——————
- //Display all the course names and their corresponding highest marks for each course and store into bag temp1.
- grunt>describe temp;
- temp: {group: chararray,student: {(sid: int,sname: chararray,marks: int,course: chararay)}}
- grunt> temp1 = foreach temp generate group as grp, MAX(student.marks);
- Grunt>dump temp1;
- ——————
- //Using all aggregation functions store the result into bag groupresult
- grunt> groupresult = foreach temp generate group,MAX(student.marks) as maxmarks,
- MIN(student.marks) as minmarks, SUM(student.marks) as totmarks,
- AVG(student.marks) as avgmarks, COUNT(student) as totalstudents;
- grunt>describe groupresult;
- groupresult: {group: chararry,maxmarks: int,minmarks: int,totmarks: long,avgmarks: double,totals}
- grunt>dump group;
- ———
- //cogroup : Grouping the data from two tables with a common column.
- nano
- sudo nano teacher.txt
- Hdfs dfs -put teacher.txt pigpractice
[text] 4044
Viewer
*** This page was generated with the meta tag "noindex, nofollow". This happened because you selected this option before saving or the system detected it as spam. This means that this page will never get into the search engines and the search bot will not crawl it. There is nothing to worry about, you can still share it with anyone.
Editor
You can edit this paste and save as new:
File Description
- 4044
- Paste Code
- 05 May-2024
- 3.56 Kb
You can Share it:
Latest Code Pastes