package day0108 import java.text.SimpleDateFormat import org.apache.log4j.{Level, Logger} import org.apache.spark.sql.SparkSession object SparkSqlDemo { def main(args: Array[String]): Unit = { //去除无用INFO Logger.getLogger("org.apache.spark").setLevel(Level.ERROR) Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF) //获取sparksession val spark = SparkSession.builder().master("local").appName("sqlDemo").getOrCreate() //读取数据,表结构映射,生成DF,注册表 import spark.sqlContext.implicits._ //创建学生表 spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Student.csv"). map(_.split(",")) .map(x => Student(x(0),x(1),x(2),x(3),x(4))) .toDF .createOrReplaceTempView("student") //创建成绩表 spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Score.csv"). map(_.split(",")) .map(x => Score(x(0),x(1),x(2))) .toDF .createOrReplaceTempView("score") //创建课程表 spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Course.csv"). map(_.split(",")) .map(x => Course(x(0),x(1),x(2))) .toDF .createOrReplaceTempView("course") //创建教师表 spark.sparkContext.textFile("D:\\testdata\\streaming\\spark_sql_test_data\\Teacher.csv"). map(_.split(",")) .map(x => Teacher(x(0),x(1),x(2),x(3),x(4),x(5))) .toDF .createOrReplaceTempView("teacher") // spark.sql("select * from student").show() // spark.sql("select * from score").show() // spark.sql("select * from course").show() // spark.sql("select * from teacher").show(false) //注意:排序使用String类型按照字典顺序排序,需要进行类型转换 // spark.sql("select degree from score order by int(degree) desc ").show() //查询没门课程的平均成绩;spark-sql对于大小写也是不敏感的 // spark.sql("select cnum,avg(cast(degree as int)) from score group by cnum").show() // spark.sql("select cnum,avg(degree) from SCORE group by cnum").show() //查询score中至少有5名学生选课,并且课程编号是3开头的课程的平均分 // spark.sql("select cnum,avg(degree) from score where cnum like '3%' group by cnum having count(1) >=5").show() //查询score表中选择多门课程的同学,最高分的学生 // spark.sql("select snum,degree from score where" + // " snum in (select t.snum from score t group by t.snum having count(t.snum)>1) " + // "and degree = (select max(degree) from score) ").show() //查询学生的学号为108的同年的所有学生 // spark.sql("select * from student where " + // "substring(sbirthday,0,4) = " + // "(select substring(sbirthday,0,4) from student where snum = '108')" ).show() //查询选修课程人数大于5,的课程的教师的姓名 // spark.sql("select tname from teacher e " + // "join course c on e.tnum = c.tnum " + // "join (select cnum from score group by cnum having count(1)>5) t on t.cnum = c.cnum").show() //查询成绩比平均成绩低的学生成绩 // spark.sql("select * from score s where s.degree < (select avg(degree) from score c where s.snum = c.snum)").show() //查询所有没有讲课的教师 // spark.sql("select * from teacher t where t.tnum not in " + // "(select tnum from course c where c.cnum in " + // "(select cnum from score))").show(false) //查询学生表的年龄 spark.sql("select sname , " + "(" + "cast("+getDate("yyyy")+" as int) " + "- " + "cast(substring(sbirthday,0,4) as int) " + ") as age from student").show() spark.sql("select sname ,"+getDate("yyyy")+" - substring(sbirthday,0,4) from student").show() spark.stop() } //获取当前时间 def getDate(time:String): String ={ val now:Long = System.currentTimeMillis() val df:SimpleDateFormat = new SimpleDateFormat(time) df.format(now) } }