SQL Queries:DATA Aggregation,JOINS (LEFT,RIGHT,INNER,CROSS & FULL OUTER JOIN)

create table exams(student_id integer,subjects varchar(20),marks int);

insert into exams values(1,'chemistry',91),(1,'physics',91),(1,'maths',92)
                 ,(2,'chemistry',91),(2,'physics',91),(2,'maths',92)
                 ,(3,'chemistry',91),(3,'physics',91),(3,'maths',92)

SELECT student_id, COUNT(*) AS total_exams, COUNT(DISTINCT marks) AS distinct_marks
FROM exams
WHERE subject IN ('chemistry', 'physics')
GROUP BY student_id;

Explanation:

  • SELECT student_id: Specifies that we want to select the student_id column.

  • COUNT(*) AS total_exams: Counts the total number of rows for each student_id, representing the total number of exams taken by each student.

  • COUNT(DISTINCT marks) AS distinct_marks: Counts the number of distinct values in the marks column for each student_id, representing the distinct marks obtained by each student.

  • FROM exams: Specifies the table exams from which the data will be retrieved.

  • WHERE subject IN ('chemistry', 'physics'): Filters the rows to include only those where the subject column is either 'chemistry' or 'physics'.

  • GROUP BY student_id: Groups the result set by the student_id column, so the aggregation functions (COUNT) are applied per student.

  • COUNT(\) & COUNT(1)-DIFFERENCE*

    In SQL, COUNT(1) is a common shorthand for counting the number of rows in a result set. It counts the number of times the expression 1 occurs within the specified column or columns.

  • However, since 1 is a constant value and does not depend on the actual data in the rows, using COUNT(1) typically performs slightly better than using COUNT(*), especially in some database systems where COUNT(*) might need to retrieve the actual data from the rows.

    Here's how you would use COUNT(1) in a SQL query:

      SELECT COUNT(1)
      FROM your_table;
    

    This will return the number of rows in the your_table. Replace your_table with the name of your table.

    Alternatively, you can also use COUNT(*) which counts the number of rows returned by the query, including those with NULL values:

      SELECT COUNT(*)
      FROM your_table;
    

    BothCOUNT(1)andCOUNT(*)will give you the same result - the number of rows in the table. However, as mentioned earlier,COUNT(1)is often preferred for performance reasons in some database systems.

HAVING CLAUSE(COUNT(*) = 2)

The HAVING clause is used in conjunction with the GROUP BY clause to filter the results of a query based on aggregated values. If you want to filter the groups to only include those with a count of 2, you would write your query like this:

sqlCopy codeSELECT column1, column2, ..., COUNT(*) AS count
FROM your_table
GROUP BY column1, column2, ...
HAVING COUNT(*) = 2;

Replace your_table with the name of your table and column1, column2, etc., with the columns you're grouping by.

This query will return rows where the count of occurrences of each group matches the condition specified in the HAVING clause, which in this case is COUNT(*) = 2. This means it will return groups where there are exactly two occurrences. Adjust the GROUP BY clause to match the grouping you require.

create table exams(student_id integer,subjects varchar(20),marks int)
--day 5
--Q1.write a update statement to update city as null for order ids==
--Q2.write a query to find the orders where city as null
--Q3.write a query to get total profit ,first order date and latest order date for each category
--Q4.write a query to find sub category the avg profit more than the half of the max profit
select 
sub_category
from oders
group by sub_category
having avg(profit)>max(profit)/2
--Q5.below
--Q6.write a query to find the students who having same marks in physics and chemistry
--Q7.write a query to find the total no of product in each catogories
select 
top 5 sub_category,sum(quantity) as total_quantity
from oders
where region='west'
group by sub_category
order by total_quantity desc
--1.group by 2.having 3.select
--where>select>order by>top 5 
--like operator for string only,it is slow
--to select date 'between 1st jan 2020 and 31st dec 2020
select
top 5 sub_category,sum(quantity) as total_quantity
from oders
where region='west'
group by sub_category
having max(profit)>5
order by total_quantity desc

--Q8.write a query to find top 5 sub categories in west region by total qty sold
--Q9.write a query to find total sales for each region and ship mode combination for orders in year 2020
insert into exams values(1,'chemistry',91),(1,'physics',91),(1,'maths',92)
                 ,(2,'chemistry',91),(2,'physics',91),(2,'maths',92)
                 ,(3,'chemistry',91),(3,'physics',91),(3,'maths',92)
select * from exams;

select COUNT(DISTINCT marks) AS distinct_marks from exams
where subjects in ('maths','physics','chemistry')
('chemistry','physics','maths')

select COUNT(*) AS total_exam from exams
where subjects in ('maths','physics','chemistry')
select * from exams;

select student_id,COUNT(distinct marks) AS distinct_marks from exams
where subjects in ('maths','physics','chemistry')
GROUP BY student_id
select * from exams

select student_id,COUNT(*) AS total_marks from exams
where subjects in ('maths','physics','chemistry')
GROUP BY student_id
--Q6.write a query to find the students who having same marks in physics and chemistry
1.select student_id from exams
where subjects in ('chemistry','physics')

select student_id from exams
where subjects in ('chemistry','physics')
group by student_id
HAVING COUNT(*) = 2
--or
select student_id,count(*),count(distinct marks) from exams
where subjects in ('chemistry','physics')
group by student_id

SELECT student_id, COUNT(*) AS total_exams, COUNT(DISTINCT marks) AS distinct_marks
FROM exams
WHERE subjects IN ('chemistry', 'physics')
GROUP BY student_id;
select * from exams;
ans--> 
--here count(*) for each id is 2(phy,chem),distinct mark should be 1 (same marks)
select student_id, Count(*) AS total_exams, count(distinct marks) as distinct_marks
from exams
where subjects in ( 'chemisrty','physics')
group by student_id
having count(*)=1 and count(distinct marks)=1;
--or
select * from exams;
select *  from exams where subjects in ( 'chemisrty','physics');
select student_id,marks, count(1) as total_rows
from exams
where subjects in ( 'chemisrty','physics')
group by student_id,marks
having count(1)=2
order by student_id 

select * from exams;
select student_id, Count(1) AS total_exams
from exams
where subjects in ( 'chemisrty','physics','maths')
group by student_id
having count(*)=2 and count(distinct marks)=2;

select student_id, Count(1) AS total_exams
from exams
where subjects in ( 'chemisrty','physics')
group by student_id
having count(*) > count(distinct marks);

select student_id
from exams
group by student_id,marks





day----6
create table employee(
    emp_id int
    ,emp_name varchar(20)
    ,dep_id int
    ,salary decimal(10,2)
    ,manager_id int
    ,emp_age int
    )
insert into employee values
    (2,'Mohit',100,15000,5,48)
    ,(3,'Bikash',100,10000,4,37)
    ,(4,'Roit',100,5000,2,16)
    ,(5,'Mudit',200,12000,6,55)
    ,(6,'Agam',200,12000,2,14)
    ,(7,'Sanjay',200,9000,2,13)
    ,(8,'Ashish',200,5000,2,12)
    ,(9,'Mukesh',300,6000,6,51)
    ,(10,'Rakesh',500,7000,6,50)
insert into employee 
values(11,'Ramesh',300,8000,6,52)
insert into employee 
values(12,'Ram',400,8000,6,52)

    select * from employee
        select dep_id,count(1) as total_count from
    employee
    group by dep_id; --count the total no as dep_id as referred and taken as unique

--Q.write a query to print dep name where non of the employee having same salary?
    select * from employee
select dep_id from employee
group by dep_id
having count(distinct salary)=count(1) 
--here count(1)=no of same dep_id is equal to no of distict salary will qualify


select status,count(customer_id) as count from test_null
group by status
status        count
active        2377
inactive    2059
null        159

select status,count(customer_id) as count from test_null 
where status <> 'inactive' 
group by status

select status,count(customer_id) as count from test_null 
where null <> 'inactive'  or status is null
group by status

--whatever col in group by you can put all the things in select

--DATABASE JOINS
SELECT * INTO NAMASTESQL.DBO.RETURNS FROM RETURNS--COPY ALL DATA TO DB NAMASTE SQL
SELECT * FROM ORDERS
SELECT * FROM RETURNS
--INNER JOIN OPERATION WITH ORDER TABLE AND RETURNS TABLE
CREATE TABLE RETURNS
(
    ORDER_ID VARCHAR(20)
    ,RETURN_REASONS VARCHAR(20)
)
ORDER_ID    SALES        ORDER_ID    RETURNS_REASON
1            100                1            BAD ITEMS
2            200                3            WRONG

SELECT O.ORDER_ID,O.ORDER_DATE,R.RETURN_REASONS
FROM ORDERS O
INNER JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID
--THIS STATEMENT JOINS WHOSE ORDER ID ARE SAME,EXACT VALUE
--JOIN OPERATIONS CAN BE DONE WITH COMMON COLUMN
--IF WE ARE NOT USING ALIAS THAN THE SQL GET CONFUSED,SINCE COL IN COMMON
--ANOTHER WAY OF DIFFERENTIATE IS TABLE_NAME.COL_NAME
ORDER_ID    SALES        RETURNS_REASON
1            100                BAD ITEMS

SELECT DISTINCT O.ORDER_ID--O.ORDER_DATE,R.RETURN_REASONS
FROM ORDERS O
INNER JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID

SELECT *
FROM ORDERS O
INNER JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID
--O/P ALL TABLE
--ALL THE COL FROM ORDERS TABLE
SELECT O.*
FROM ORDERS O
INNER JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID

SELECT O.*,R.RETURNS_REASON
FROM ORDERS O
INNER JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID
--IT IS GOOD PRACTICE TO USE ALIAS FOR EACCH COLUMN WHEN WE ARE USNING MULTIPLE TABLE

INSERT INTO RETURNS
SELECT * FROM RETURNS_BACK
--LEFT JOIN--WHENEVER IT IS NOT GETTING DATA IN THE RETURENS TABLE IT WILL RETUN NULL IN EXCEL=NA
--ALL THE THINGS BEFORE 'LEFT JOIN' IS LEFT TABLE BELOW 
SELECT O.ORDER_ID,O.PRODUCT_ID,R.RETURN_REASONS,R.ORDER_ID AS RETURN_ORDER_ID
FROM ORDERS O
LEFT JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID
--ORDER OF OPERATION FROM>JOIN>WHERE
SELECT O.ORDER_ID,O.PRODUCT_ID,R.RETURN_REASONS,R.ORDER_ID AS RETURN_ORDER_ID
FROM ORDERS O
LEFT JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID
WHERE R.ORDER_ID IS NULL--OR YOU CAN USE R.RETURN_REASON IS NULL

--LOSS OF SALES 
SELECT R.RETURN_REASONS,SUM(SALES) AS TOTAL_SALES
FROM ORDERS O
LEFT JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID--RETURN THE NULL VALUE ALSO
GROUP BY R.RETURN_REASONS

SELECT R.RETURN_REASONS,SUM(SALES) AS TOTAL_SALES
FROM ORDERS O
INNER JOIN RETURNS R ON O.ORDER_ID=R.ORDER_ID
GROUP BY R.RETURN_REASONS
--JOIN OPERATOR IS ALWAYS DEFAULT TO INNER JOIN
--IT IS GOOD PRACTICE TO MENTION FULL NAME
----------------------------------------------------------
--WE HAVING ANOTHER TABLE EMPLOYEE AND DEPT TABLE
CREATE TABLE DEPT
(
    DEP_ID INT
    ,DEP_NAME VARCHAR(20)
)
INSERT INTO DEPT
VALUES(100,'ANALYTICS')
    ,(200,'IT')
    ,(300,'HR')
    ,(400,'TEXT ANALYTICS')

SELECT * FROM employee;
SELECT * FROM DEPT;
--IN CROSS JOIN--> IF NO ANY CONDITION IS GIVEN THAN EVERY RECORD WILL JOIN WITH EACH RECORD OF TABLE 2
--IF TABLE1(2 RECORD) AND TABLE2(3 RECORD) O/P=2*3=6 RECORDS
--EVERY RECORD OF ONE TABLE WILL JOIN WITH EVERY RECCORD OF ANOTHER TABLE
SELECT * 
FROM 
employee,DEPT;

SELECT * 
FROM 
employee,DEPT
ORDER BY employee.emp_id;
--INNER JOIN AS CROSS JOIN
SELECT * 
FROM 
employee
INNER JOIN DEPT ON 1=1--CLAUSE JOIN-ANY TRUE CONDITIONS DEPT RESULT IT WILL JOIN EACH RECORD OF BOTH TABLE;
ORDER BY employee.emp_id


SELECT * FROM employee;
SELECT * FROM DEPT;
SELECT * 
FROM 
employee E
INNER JOIN DEPT D ON E.dep_id=D.DEP_ID
ORDER BY E.emp_id;
--BEFORE LEFT THE TABLE IS LEFT TABLE
SELECT * 
FROM 
employee E
left JOIN DEPT D ON E.dep_id=D.DEP_ID
ORDER BY E.emp_id;
------------------RIGHT JOIN---------------
--RIGHT JOIN -REVERSE OF LEFT JOIN ,WE ARE GETTING EVERY THING IN THE RIGHT TABLE
--NULL VALU FROM LEFT TABLE WHOSE VALUE IS EMPTY
SELECT * 
FROM 
DEPT,employee;
--BELOW BOTH STATEMENT ARE SAME --ACT AS RIGHT JOIN
--FIRST TABLE IS DRIVER TABLE,2ND TABLE -CONDUCTOR TABLE
--AFTER RIGHT IT WILL BE RIGHT TABLE
SELECT * 
FROM 
employee E
RIGHT JOIN DEPT D ON E.dep_id=D.DEP_ID
ORDER BY E.emp_id;
--SWAPPING THE TABLE
SELECT * 
FROM 
DEPT D
LEFT JOIN  employee E ON E.dep_id=D.DEP_ID
ORDER BY E.emp_id;
------FULL OUTER JOIN-------------------
--COMMMON WILL COME,EXTRA IN LEFT WILL COME,EXTRA IN RIGHT WILL COME

SELECT * 
FROM 
DEPT D
FULL OUTER JOIN  employee E ON E.dep_id=D.DEP_ID
ORDER BY E.emp_id;