<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>온 세상이 데이터다</title>
    <link>https://hynnjnn.tistory.com/</link>
    <description></description>
    <language>ko</language>
    <pubDate>Mon, 1 Jun 2026 16:25:16 +0900</pubDate>
    <generator>TISTORY</generator>
    <ttl>100</ttl>
    <managingEditor>Hynnjnn</managingEditor>
    <item>
      <title>2024-01-02 데브코스 3주차 - 5 TIL</title>
      <link>https://hynnjnn.tistory.com/58</link>
      <description>&lt;h1&gt;Indian Restaurant Dataset&lt;/h1&gt;
&lt;pre&gt;&lt;code class=&quot;language-sql&quot;&gt;SELECT * FROM `dev-course`.indian_restaurants;&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;mySQL에서 여러 스키마 있을 경우 스키마.테이블명으로 접근해야 해서 불편하다. 이럴 경우 USE문을 한번 실행해준 후 사용하면 된다.&lt;/p&gt;
&lt;pre&gt;&lt;code class=&quot;language-sql&quot;&gt;USE `dev-course`;
SELECT * FROM indian_restaurants;&lt;/code&gt;&lt;/pre&gt;
&lt;/br&gt;

&lt;pre&gt;&lt;code class=&quot;language-sql&quot;&gt;USE `dev-course`;

-- restaurant name 중복 확인
SELECT count(1) as cnt, count(distinct restaurant_name) as cnt_distinct
from restaurants;

-- cnt 1 이상인 restaurant name 찾기
SELECT restaurant_name, count(1) as cnt
FROM restaurants
GROUP BY 1
HAVING cnt &amp;gt; 1;

-- 몇개 뽑아서 location이 다른지, location과 name이 같은 경우가 있는지 확인
SELECT *
FROM restaurants
WHERE restaurant_name in (&amp;#39;7th Heaven&amp;#39;, &amp;#39;1441 Pizzeria&amp;#39;, &amp;#39;1944 -The HOCCO Kitchen&amp;#39;)
ORDER BY restaurant_name, location;

SELECT count(1) as cnt, count(distinct restaurant_name, location, fast_food_or_not) as cnt_distinct
FROM restaurants;
-- 결론: 행을 구분할 수 있는 유니크 키가 존재하지 않는다.

-- 레스토랑 이름별로 평점 확인
SELECT restaurant_name, count(1) as cnt, avg(rating) as avg_rating, avg(average_price) as avg_price, avg(average_delivery_time) as avg_delivery_time
FROM restaurants
GROUP BY 1
ORDER BY 3 DESC;
-- cnt 1인 레스토랑이 평점이 높다?

-- 실제로 cnt 1인 레스토랑이 평점이 높은지 확인
WITH counts as (
    SELECT restaurant_name, count(1) as cnt
    FROM restaurants
    GROUP BY 1
)
SELECT CASE cnt
       WHEN 1 THEN &amp;#39;cnt-1&amp;#39;
       WHEN 2 THEN &amp;#39;cnt-2&amp;#39;
       ELSE &amp;#39;cnt-ov3&amp;#39;
       END AS cnt_group, avg(rating) as avg_rating
FROM counts INNER JOIN restaurants ON counts.restaurant_name = restaurants.restaurant_name
GROUP BY 1;
-- 그렇진 않다. 오히려 레시피 등이 정형화된 여러 지점이 있는 레스토랑이 평균 평점이 높다. 개인 레스토랑은 편차가 클 것.

WITH counts as (
    SELECT restaurant_name, count(1) as cnt
    FROM restaurants
    GROUP BY 1
)
SELECT counts.restaurant_name, cnt, rating
FROM counts INNER JOIN restaurants ON counts.restaurant_name = restaurants.restaurant_name;
-- export 해서 파이썬으로 지점 개수에 따른 히스토그램

-- Pearson 상관계수? -1 ~ 1

-- 큰 도시 작은도시 구분? -&amp;gt; location 별 레스토랑 개수로 판단
WITH cnts as (
    SELECT location, count(1) as cnt
    FROM restaurants
    GROUP BY 1
)
SELECT location, cnt
FROM cnts
ORDER BY 2 DESC
LIMIT 30;

-- percent 사용해서 cnt 개수로 랭크
WITH cnts as (
    SELECT location, count(1) as cnt
    FROM restaurants
    GROUP BY 1
)
SELECT location, percent_rank() over (ORDER BY cnt) as cnt_rank
FROM cnts
ORDER BY 2 DESC
LIMIT 30;

-- 상위 5퍼 location
WITH cnts as (
    SELECT location, count(1) as cnt
    FROM restaurants
    GROUP BY 1
),
res as (
    SELECT location, percent_rank() over (ORDER BY cnt) as cnt_rank
    FROM cnts
)
SELECT location
FROM res
WHERE cnt_rank &amp;gt; 0.95;

-- 하위 15퍼 location
WITH cnts as (
    SELECT location, count(1) as cnt
    FROM restaurants
    GROUP BY 1
),
res as (
    SELECT location, percent_rank() over (ORDER BY cnt) as cnt_rank
    FROM cnts
)
SELECT location
FROM res
WHERE cnt_rank &amp;lt; 0.15;


-- binary 값에 따른 평균 가격
SELECT IF(south_indian_or_not = 0, &amp;#39;south-0&amp;#39;, &amp;#39;south-1&amp;#39;) as group_name, AVG(average_price) as avg_price
FROM restaurants
GROUP BY 1

UNION

SELECT IF(north_indian_or_not = 0, &amp;#39;north-0&amp;#39;, &amp;#39;north-1&amp;#39;) as group_name, AVG(average_price) as avg_price
FROM restaurants
GROUP BY 1

UNION

SELECT IF(fast_food_or_not = 0, &amp;#39;fast-0&amp;#39;, &amp;#39;fast-1&amp;#39;) as group_name, AVG(average_price) as avg_price
FROM restaurants
GROUP BY 1

UNION

SELECT IF(street_food = 0, &amp;#39;street-0&amp;#39;, &amp;#39;street-1&amp;#39;) as group_name, AVG(average_price) as avg_price
FROM restaurants
GROUP BY 1

UNION

SELECT IF(north_indian_or_not = 0, &amp;#39;north-0&amp;#39;, &amp;#39;north-1&amp;#39;) as group_name, AVG(average_price) as avg_price
FROM restaurants
GROUP BY 1

UNION

SELECT IF(biryani_or_not = 0, &amp;#39;biryani-0&amp;#39;, &amp;#39;biryani-1&amp;#39;) as group_name, AVG(average_price) as avg_price
FROM restaurants
GROUP BY 1

UNION

SELECT IF(bakery_or_not = 0, &amp;#39;bakery-0&amp;#39;, &amp;#39;bakery-1&amp;#39;) as group_name, AVG(average_price) as avg_price
FROM restaurants
GROUP BY 1;
-- 차이가 있는 것을 확인


-- Rishikesh=가장 저렴한 레스토랑, Shimla=가장 비싼 레스토랑 비교
WITH base as (
    SELECT location, south_indian_or_not, north_indian_or_not, fast_food_or_not, street_food, biryani_or_not, bakery_or_not
    FROM restaurants
    WHERE location in (&amp;#39;Rishikesh&amp;#39;, &amp;#39;Shimla&amp;#39;)
)
SELECT location, count(1) as tot_cnt, SUM(south_indian_or_not) as south_cnt,
SUM(north_indian_or_not) as north_cnt, SUM(fast_food_or_not) as fast_cnt,
SUM(street_food) as street_cnt, SUM(biryani_or_not) as biryani_cnt,
SUM(bakery_or_not) as bakery_cnt
FROM base
GROUP BY 1;


-- binary값에 따른 평균 평점, 평균 배달시간도 확인
SELECT IF(south_indian_or_not = 0, &amp;#39;south-0&amp;#39;, &amp;#39;south-1&amp;#39;) as group_name, AVG(average_price) as avg_price,
AVG(average_delivery_time) as avg_time, ROUND(AVG(rating), 3) as avg_rating
FROM restaurants
GROUP BY 1

UNION

SELECT IF(north_indian_or_not = 0, &amp;#39;north-0&amp;#39;, &amp;#39;north-1&amp;#39;) as group_name, AVG(average_price) as avg_price,
AVG(average_delivery_time) as avg_time, ROUND(AVG(rating), 3) as avg_rating
FROM restaurants
GROUP BY 1

UNION

SELECT IF(fast_food_or_not = 0, &amp;#39;fast-0&amp;#39;, &amp;#39;fast-1&amp;#39;) as group_name, AVG(average_price) as avg_price,
AVG(average_delivery_time) as avg_time, ROUND(AVG(rating), 3) as avg_rating
FROM restaurants
GROUP BY 1

UNION

SELECT IF(street_food = 0, &amp;#39;street-0&amp;#39;, &amp;#39;street-1&amp;#39;) as group_name, AVG(average_price) as avg_price,
AVG(average_delivery_time) as avg_time, ROUND(AVG(rating), 3) as avg_rating
FROM restaurants
GROUP BY 1

UNION

SELECT IF(north_indian_or_not = 0, &amp;#39;north-0&amp;#39;, &amp;#39;north-1&amp;#39;) as group_name, AVG(average_price) as avg_price,
AVG(average_delivery_time) as avg_time, ROUND(AVG(rating), 3) as avg_rating
FROM restaurants
GROUP BY 1

UNION

SELECT IF(biryani_or_not = 0, &amp;#39;biryani-0&amp;#39;, &amp;#39;biryani-1&amp;#39;) as group_name, AVG(average_price) as avg_price,
AVG(average_delivery_time) as avg_time, ROUND(AVG(rating), 3) as avg_rating
FROM restaurants
GROUP BY 1

UNION

SELECT IF(bakery_or_not = 0, &amp;#39;bakery-0&amp;#39;, &amp;#39;bakery-1&amp;#39;) as group_name, AVG(average_price) as avg_price,
AVG(average_delivery_time) as avg_time, ROUND(AVG(rating), 3) as avg_rating
FROM restaurants
GROUP BY 1;

-- 평점 가장 낮은/높은 지역(DESC)
SELECT location, AVG(rating) as avg_rating, count(1) as cnt
FROM restaurants
GROUP BY 1
ORDER BY 2
LIMIT 10;

WITH base as (
    SELECT location, south_indian_or_not, north_indian_or_not, fast_food_or_not, street_food, biryani_or_not, bakery_or_not
    FROM restaurants
    WHERE location in (&amp;#39;Junagadh&amp;#39;, &amp;#39;Motihari&amp;#39;)
)
SELECT location, count(1) as tot_cnt, SUM(south_indian_or_not) as south_cnt,
SUM(north_indian_or_not) as north_cnt, SUM(fast_food_or_not) as fast_cnt,
SUM(street_food) as street_cnt, SUM(biryani_or_not) as biryani_cnt,
SUM(bakery_or_not) as bakery_cnt
FROM base
GROUP BY 1;&lt;/code&gt;&lt;/pre&gt;
&lt;/br&gt;

&lt;h1&gt;Global AI, ML, Data Science Salary&lt;/h1&gt;
&lt;pre&gt;&lt;code class=&quot;language-sql&quot;&gt;USE `dev-course`;

SELECT work_year, count(1) as cnt
FROM salary
GROUP BY 1;

-- 데이터가 많은 2022, 2023년만 비교
SELECT work_year, AVG(salary_in_usd) as usd_salary
FROM salary
WHERE work_year in (&amp;#39;2022&amp;#39;, &amp;#39;2023&amp;#39;)
GROUP BY 1
ORDER BY 1;

-- 연봉이 왜 올랐나?

-- 숙련도에 따른 연봉 차이
SELECT experience_level, AVG(salary_in_usd) as usd_salary
FROM salary
GROUP BY 1
ORDER BY 2;

-- 직무에 따른 연봉 차이
SELECT job_title, AVG(salary_in_usd) as usd_salary
FROM salary
GROUP BY 1
ORDER BY 2;
-- 직무명에 직급이 포함된 경우가 많음 -&amp;gt; 나중에 비슷한 직무끼리 그룹핑 ㄱㄱ

-- 국가
SELECT employee_residence, AVG(salary_in_usd) as usd_salary
FROM salary
GROUP BY 1
ORDER BY 2;

-- 원격 출근
SELECT remote_ratio, AVG(salary_in_usd) as usd_salary
FROM salary
GROUP BY 1
ORDER BY 2;

-- 회사 규모
SELECT company_size, AVG(salary_in_usd) as usd_salary
FROM salary
GROUP BY 1
ORDER BY 2;

-- 2022 &amp;gt; 2023 숙련도별 직원 수 차이
WITH bef as (
    SELECT work_year, experience_level, count(1) as cnt_2022
    FROM salary
    WHERE work_year = &amp;#39;2022&amp;#39;
    GROUP BY 1, 2
),
aft as (
    SELECT work_year, experience_level, count(1) as cnt_2023
    FROM salary
    WHERE work_year = &amp;#39;2023&amp;#39;
    GROUP BY 1, 2
)
SELECT bef.*, aft.cnt_2023
FROM bef INNER JOIN aft ON bef.experience_level = aft.experience_level;
-- export 해서 비율로
-- 고연봉 직급의 비율이 늘어서 평균 연봉이 높아졌다.


WITH bef as (
    SELECT work_year, company_size, count(1) as cnt_2022
    FROM salary
    WHERE work_year = &amp;#39;2022&amp;#39;
    GROUP BY 1, 2
),
aft as (
    SELECT work_year, company_size, count(1) as cnt_2023
    FROM salary
    WHERE work_year = &amp;#39;2023&amp;#39;
    GROUP BY 1, 2
)
SELECT bef.*, aft.cnt_2023
FROM bef INNER JOIN aft ON bef.company_size = aft.company_size;
-- 평균 연봉이 높은 중간 규모 회사의 비율이 늘었음
-- 결론: 22년에 비해 23년의 평균 연봉이 증가한 이유는 시니어 비중이 높아지고, 중간 규모 회사의 비중이 높아졌기 때문이다.


-- 연도별 원격 근무 비중
SELECT work_year, AVG(remote_ratio) as avg_remote_ratio
FROM salary
WHERE work_year in (&amp;#39;2022&amp;#39;, &amp;#39;2023&amp;#39;)
GROUP BY 1
ORDER BY 1;

-- 미국내 미국 외 근무 비율
WITH bef as (
    SELECT work_year, IF(employee_residence = &amp;#39;US&amp;#39;, &amp;#39;In-US&amp;#39;, &amp;#39;Out-US&amp;#39;) as residence_group, count(1) as cnt_2022
    FROM salary
    WHERE work_year = &amp;#39;2022&amp;#39;
    GROUP BY 1, 2
),
aft as (
    SELECT work_year, IF(employee_residence = &amp;#39;US&amp;#39;, &amp;#39;In-US&amp;#39;, &amp;#39;Out-US&amp;#39;) as residence_group, count(1) as cnt_2023
    FROM salary
    WHERE work_year = &amp;#39;2023&amp;#39;
    GROUP BY 1, 2
)
SELECT bef.*, aft.cnt_2023
FROM bef INNER JOIN aft ON bef.residence_group = aft.residence_group;
-- 해외 근무 비율이 줄어듦


-- 직무별 평균 연봉, 원격 근무 비중
SELECT CASE
       WHEN job_title LIKE &amp;#39;%Scientist%&amp;#39; THEN &amp;#39;S&amp;#39;
       WHEN job_title LIKE &amp;#39;%Director%&amp;#39; THEN &amp;#39;D&amp;#39;
       WHEN job_title LIKE &amp;#39;%Engineer%&amp;#39; THEN &amp;#39;E&amp;#39;
       WHEN job_title LIKE &amp;#39;%Analyst%&amp;#39; THEN &amp;#39;A&amp;#39;
       WHEN job_title LIKE &amp;#39;%Architect%&amp;#39; THEN &amp;#39;AC&amp;#39;
       WHEN job_title LIKE &amp;#39;%Consultant%&amp;#39; THEN &amp;#39;C&amp;#39;
       WHEN job_title LIKE &amp;#39;%Manager%&amp;#39; THEN &amp;#39;M&amp;#39;
       WHEN job_title LIKE &amp;#39;%Specialist%&amp;#39; THEN &amp;#39;SP&amp;#39;
       WHEN job_title LIKE &amp;#39;%Practitioner%&amp;#39; THEN &amp;#39;P&amp;#39;
       ELSE &amp;#39;OTHER&amp;#39;
       END AS job_group, AVG(salary_in_usd) as avg_salary, AVG(remote_ratio) as avg_remote_ratio, count(1) as cnt
FROM salary
GROUP BY 1
ORDER BY 3;

-- 직무별 미국 내외 근무 비중
WITH base as (
SELECT CASE
       WHEN job_title LIKE &amp;#39;%Scientist%&amp;#39; THEN &amp;#39;S&amp;#39;
       WHEN job_title LIKE &amp;#39;%Director%&amp;#39; THEN &amp;#39;D&amp;#39;
       WHEN job_title LIKE &amp;#39;%Engineer%&amp;#39; THEN &amp;#39;E&amp;#39;
       WHEN job_title LIKE &amp;#39;%Analyst%&amp;#39; THEN &amp;#39;A&amp;#39;
       WHEN job_title LIKE &amp;#39;%Architect%&amp;#39; THEN &amp;#39;AC&amp;#39;
       WHEN job_title LIKE &amp;#39;%Consultant%&amp;#39; THEN &amp;#39;C&amp;#39;
       WHEN job_title LIKE &amp;#39;%Manager%&amp;#39; THEN &amp;#39;M&amp;#39;
       WHEN job_title LIKE &amp;#39;%Specialist%&amp;#39; THEN &amp;#39;SP&amp;#39;
       WHEN job_title LIKE &amp;#39;%Practitioner%&amp;#39; THEN &amp;#39;P&amp;#39;
       ELSE &amp;#39;OTHER&amp;#39;
       END AS job_group, IF(employee_residence = &amp;#39;US&amp;#39;, &amp;#39;In-US&amp;#39;, &amp;#39;Out-US&amp;#39;) as residence_group
FROM salary
),
g_1 as (
    SELECT job_group, residence_group, count(1) as group_cnt
    FROM base
    GROUP BY 1, 2
),
g_2 as (
    SELECT job_group, count(1) as tot_cnt
    FROM base
    GROUP BY 1
)
SELECT g_1.job_group, residence_group, group_cnt, group_cnt / tot_cnt as ratio
FROM g_1 INNER JOIN g_2 ON g_1.job_group = g_2.job_group;

-- 숙련도(categorical) 연봉(숫자)간의 상관계수? -&amp;gt; Point-Biserial&lt;/code&gt;&lt;/pre&gt;
&lt;/br&gt;

&lt;h1&gt;NBA Players&lt;/h1&gt;
&lt;pre&gt;&lt;code class=&quot;language-sql&quot;&gt;USE `dev-course`;

-- 시즌별 데이터 분포?
SELECT season, count(1) as cnt
FROM nba
GROUP BY 1
ORDER BY 1;

-- player_name으로 구분 가능한지
SELECT count(1) as cnt, count(distinct season, player_name) as d_cnt
FROM nba;

-- 동명이인 확인
SELECT player_name, season, count(1) as cnt
FROM nba
GROUP BY 1, 2
HAVING cnt &amp;gt; 1;


-- 중복 선수 확인
WITH dups as (
    SELECT player_name, season, count(1) as cnt
    FROM nba
    GROUP BY 1, 2
    HAVING cnt &amp;gt; 1
),
tot as (
    SELECT *
    FROM nba
)
SELECT tot.*
FROM tot INNER JOIN dups ON tot.player_name = dups.player_name and tot.season = dups.season;

-- 선수 구분하기 위해서는 세 개의 컬럼이 필요 (선수명, 시즌, 팀 or 대학)
SELECT count(1) as cnt, count(distinct player_name, season, team_abbreviation) as d_cnt, count(distinct player_name, season, college) as d_cnt_2
FROM nba;

-- 시즌별 신장, 체중, 경기 수, 득점, 리바운드, 어시스트
SELECT season, AVG(player_height) as height, AVG(player_weight) as weight, AVG(gp) as gp,
AVG(pts) as pts, AVG(reb) as reb, AVG(ast) as ast
FROM nba
GROUP BY 1;


-- 상위(드래프트 1라운드, 10순위 이내) 드래프트 선수들의 평균 신장 체중 나이
SELECT draft_year, AVG(player_height), AVG(player_weight), AVG(age)
FROM nba
WHERE draft_round = 1
AND draft_number &amp;lt;= 10
GROUP BY 1;


-- 시즌별 득점왕?
WITH max_pts as (
    SELECT season, MAX(pts) as max_pt
    FROM nba
    GROUP BY 1
)
SELECT m.season, player_name, age, player_height, player_weight, max_pt, gp
FROM max_pts m INNER JOIN nba n ON m.season = n.season AND m.max_pt = n.pts
ORDER BY 1;


-- 시즌별 각 선수 랭크
SELECT season, player_name, rank() over (PARTITION BY season ORDER BY pts DESC) as pts_rank, pts
FROM nba;


-- 시즌별 상위 10명의 득점 수 
WITH base as (
    SELECT season, player_name, rank() over (PARTITION BY season ORDER BY pts DESC) as pts_rank, pts
    FROM nba
)
SELECT season, AVG(pts) as pts
FROM base
WHERE pts_rank &amp;lt;= 10
GROUP BY 1
ORDER BY 1;&lt;/code&gt;&lt;/pre&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/58</guid>
      <comments>https://hynnjnn.tistory.com/58#entry58comment</comments>
      <pubDate>Tue, 2 Jan 2024 18:44:50 +0900</pubDate>
    </item>
    <item>
      <title>2023-12-07 데브코스 3주차 - 4 TIL</title>
      <link>https://hynnjnn.tistory.com/57</link>
      <description>&lt;h3 data-ke-size=&quot;size23&quot;&gt;효율적인 쿼리&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;테이블을 집합으로 생각하기&lt;/li&gt;
&lt;li&gt;최대한 작게 만들어 놓고 JOIN 하기&lt;/li&gt;
&lt;li&gt;*사용을 지양하고 필요한 컬럼만 뽑아쓰기&lt;/li&gt;
&lt;li&gt;%사용 지양하고 최대한 조건 좁혀서 쓰기&lt;/li&gt;
&lt;li&gt;LIMIT 걸어서 조회하기&lt;/li&gt;
&lt;li&gt;SQL 묵시적 형변환 지원한다. 하지만 묵시적 형변환 시 쿼리 비효율적&lt;br /&gt;비교할 때 자료형 확인하기&lt;/li&gt;
&lt;li&gt;WHERE 절에서 왼쪽 컬럼에 함수 적용 지양하기?&lt;br /&gt;WHERE DATE_FORMAT(date, '%Y%m%d') &amp;lt;= '2023-12-07' 이런식으로 쓰지 말고 WHERE date &amp;lt;= DATE_FORMAT('2023-12-07', '%Y%m%d') 이렇게 써라.&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;가독성 높이기&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;서브쿼리보다는 WITH 구문이 가독성이 좋다&lt;/li&gt;
&lt;li&gt;WITH 절을 사용할 때, 각 블록 이름을 잘 지정하자.&lt;/li&gt;
&lt;li&gt;쿼리가 복잡해지면 중간중간 주석 작성하기&lt;/li&gt;
&lt;/ul&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/57</guid>
      <comments>https://hynnjnn.tistory.com/57#entry57comment</comments>
      <pubDate>Thu, 7 Dec 2023 22:39:10 +0900</pubDate>
    </item>
    <item>
      <title>2023-12-06 데브코스 3주차 - 3 TIL</title>
      <link>https://hynnjnn.tistory.com/56</link>
      <description>&lt;p&gt;VARCHAR 0~65535&lt;br&gt;TEXT 길이 최대 65535&lt;br&gt;속도도 VARCHAR이 더 빠르면 TEXT는 언제 쓰나?&lt;/p&gt;
&lt;h3&gt;배열&lt;/h3&gt;
&lt;p&gt;MySQL 8.0 이상?&lt;br&gt;[1,2,3] json 타입으로 배열 저장 JSON_ARRAY(&amp;quot;소&amp;quot;, &amp;quot;중&amp;quot;, &amp;quot;대&amp;quot;) = &amp;#39;[&amp;quot;소&amp;quot;, &amp;quot;중&amp;quot;, &amp;quot;대&amp;quot;]&amp;#39;&lt;/p&gt;
&lt;p&gt;SELECT JSON_EXTRACT(columns, &amp;#39;$&amp;#39;) -&amp;gt; 모든 원소&lt;br&gt;SELECT JSON_EXTRACT(columns, &amp;#39;$[0]&amp;#39;) -&amp;gt; 0번 인덱스&lt;/p&gt;
&lt;h3&gt;구조체&lt;/h3&gt;
&lt;p&gt;key-value json 타입으로 key-value 저장 &lt;/p&gt;
&lt;p&gt;&lt;strong&gt;복습하기&lt;/strong&gt;&lt;/p&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/56</guid>
      <comments>https://hynnjnn.tistory.com/56#entry56comment</comments>
      <pubDate>Thu, 7 Dec 2023 09:41:39 +0900</pubDate>
    </item>
    <item>
      <title>2023-12-05 데브코스 3주차 - 2 TIL</title>
      <link>https://hynnjnn.tistory.com/55</link>
      <description>&lt;h3&gt;JOIN&lt;/h3&gt;
&lt;ul&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Inner join&lt;/strong&gt; : 교집합, 정보 손실 있을 수 있음&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;SELECT clicks.*, 1 as ordered, name
FROM clicks INNER JOIN orders
    ON clicks.user_name = orders.user_name
    AND clicks.product_id = orders.product_id
    AND clicks.date = orders.date
INNER JOIN products on clicks.product_id = products.product_id&lt;/code&gt;&lt;/pre&gt;&lt;p&gt;table.* 이런 식으로도 사용 가능함.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Left join(Left outer join)&lt;/strong&gt; : 왼쪽 테이블의 모든 행을 출력.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Right join(Right outer join)&lt;/strong&gt; : 오른쪽 테이블의 모든 행을 출력&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Full outer join&lt;/strong&gt; : left join union right join. db에 과부하를 줄 수 있음&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Cross join(Cartesian product)&lt;/strong&gt; : 모든 조합. 두 테이블 유사도 분석할 때 사용할 수 있음&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;&lt;strong&gt;Self join&lt;/strong&gt; : Inner join을 사용.&lt;/p&gt;
&lt;pre&gt;&lt;code&gt;SELECT m1.*, m2.id as sub_id, m2.name as sub_name
FROM managers_v2 m1 INNER JOIN managers_v2 m2
ON m1.substitute = m2.id&lt;/code&gt;&lt;/pre&gt;&lt;p&gt;같은 테이블을 참조하는 컬럼인 대체자 id를 대체자 이름으로 표현할 때 사용&lt;/p&gt;
&lt;br/&gt;

&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;필터링 먼저 하고 join하는 것이 효율적이다.&lt;/p&gt;
&lt;p&gt;full outer join 한 후 어느 한쪽의 key값이 null인것만 고르면 교집합 뺀 결과 얻을 수 있음&lt;/p&gt;
&lt;p&gt;위에 있는 쿼리 구조 잘 봐두기&lt;/p&gt;
&lt;br/&gt;
### UNION

&lt;p&gt;union: select 결과 합칠 때 사용, 중복 제거&lt;br&gt;union all: 중복 제거 안함&lt;/p&gt;
&lt;br/&gt;
### WITH


&lt;p&gt;CTE(Common Table Expression)라고도 함&lt;br&gt;MySQL 8.0 버전 이상에서 지원&lt;br&gt;임시 결과 집합을 생성하여 복잡한 쿼리 쉽게 작성할 수 있음&lt;br&gt;SELECT 결과를 테이블처럼&lt;br&gt;가독성 좋아짐&lt;br&gt;실습 링크 : &lt;a href=&quot;https://www.programiz.com/sql/online-compiler/&quot;&gt;https://www.programiz.com/sql/online-compiler/&lt;/a&gt;&lt;/p&gt;
&lt;pre&gt;&lt;code class=&quot;language-sql&quot;&gt;WITH odr_cnt as (

SELECT c.customer_id, count(distinct order_id) as odr_cnt, sum(amount) as total_purchase
FROM Customers c INNER JOIN Orders o on c.customer_id = o.customer_id
GROUP BY 1
ORDER BY 2 DESC

),
ship_cnt as (
  SELECT c.customer_id, count(distinct shipping_id) as ship_cnt
  FROM Customers c INNER JOIN Shippings s on c.customer_id = s.customer
  WHERE status = &amp;#39;Pending&amp;#39;
  GROUP BY 1
  ORDER BY 2 DESC

)
SELECT oc.customer_id, odr_cnt, total_purchase, COALESCE(ship_cnt, 0) as shipping_cnt
FROM odr_cnt oc LEFT JOIN ship_cnt sc on oc.customer_id = sc.customer_id
&lt;/code&gt;&lt;/pre&gt;
&lt;br/&gt;

&lt;h3&gt;시간&lt;/h3&gt;
&lt;p&gt;STRING: &amp;#39;yyyy-mm-dd&amp;#39;, &amp;#39;yyyy-mm-dd HH:MM:SS&amp;#39; -&amp;gt; 시간 계산 함수 사용 불가능&lt;br&gt;DATE: yyyy-mm-dd&lt;br&gt;DATETIME: YYYY-MM-DD HH:MM:SS&lt;br&gt;TIMESTAMP: YYYY-MM-DD HH:MM:SS UTC&lt;/p&gt;
&lt;p&gt;NOW(): 쿼리 실행 시간&lt;br&gt;SYSDATE(): 자신이 호출된 시간&lt;/p&gt;
&lt;p&gt;DATE_ADD + INTERVAL: 시간 더하고 빼기&lt;/p&gt;
&lt;h3&gt;타입 변환&lt;/h3&gt;
&lt;p&gt;CAST: CAST(&amp;#39;12131&amp;#39; AS SIGNED INTEGER)&lt;br&gt;CONVERT: 사용법 비슷&lt;/p&gt;
&lt;h3&gt;조건절&lt;/h3&gt;
&lt;p&gt;IF(조건, 참일 경우, 거짓일 경우)&lt;br&gt;IFNULL(컬럼, 널일 경우 채울 값)&lt;/p&gt;
&lt;p&gt;CASE: ORDER BY, WHERE 절 안에서도 쓸 수 있음&lt;br&gt;CASE WHEN 조건 THEN 값&lt;br&gt;WHEN 조건 THEN 값&lt;br&gt;ELSE 값&lt;br&gt;END as 별칭&lt;/p&gt;
&lt;h3&gt;유용한 함수&lt;/h3&gt;
&lt;p&gt;rank(): rank() over (order by age desc)&lt;br&gt;dense_rank()&lt;br&gt;percent_rank()&lt;/p&gt;
&lt;p&gt;partition by column: 컬럼 기준 파티션&lt;br&gt;lead(컬럼, 다음 n번째) over (partition)&lt;br&gt;lag(컬럼, 이전 n번째) over (partition)&lt;br&gt;ㄴ윈도우 함수라고 한다.&lt;/p&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/55</guid>
      <comments>https://hynnjnn.tistory.com/55#entry55comment</comments>
      <pubDate>Tue, 5 Dec 2023 23:53:11 +0900</pubDate>
    </item>
    <item>
      <title>2023-12-04 데브코스 3주차 - 1 TIL</title>
      <link>https://hynnjnn.tistory.com/54</link>
      <description>&lt;h3&gt;SQL과 RDB란 무엇인가?&lt;/h3&gt;
&lt;p&gt;데이터베이스&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;&lt;p&gt;관계형 데이터베이스(RDBMS)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;표 형태(MySQL, Oracle, PostgreSQL 등)&lt;/li&gt;
&lt;li&gt;주로 정형 데이터&lt;/li&gt;
&lt;li&gt;트랜잭션 처리 -&amp;gt; 일관성과 무결성 필요한 서비스에서 사용(트랜잭션의 ACID 특성)&lt;/li&gt;
&lt;li&gt;고정된 스키마 -&amp;gt; 데이터 구조 변경 어려움 -&amp;gt; 수평 확장 어려움&lt;/li&gt;
&lt;li&gt;SQL을 통해 데이터 수집, 관리&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;비-관계형 데이터베이스(NoSQL)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Key-value Store, Column-family, Document, Graph 등의 형태(MongoDB, cassandra 등)&lt;/li&gt;
&lt;li&gt;비정형/반정형 데이터(JSON, XML 등)&lt;/li&gt;
&lt;li&gt;ACID 특성 보장하지 않는 경우도 있음, 중복&lt;/li&gt;
&lt;li&gt;유연한 스키마 -&amp;gt; 데이터 구조 변경 용이 -&amp;gt; 수평 확장 용이&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;


&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;SQL(Structured Query Language)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Table, Column, Row&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;


&lt;p&gt;&lt;a href=&quot;http://sqlfiddle.com&quot;&gt;sqlfiddle 사이트&lt;/a&gt;에서 실습하기&lt;/p&gt;
&lt;br/&gt;


&lt;p&gt;&lt;strong&gt;팁&lt;/strong&gt;&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;필요한 행과 열만 가져오는 것이 좋음&lt;/li&gt;
&lt;li&gt;WHERE 바로 옆에 1=1과 같은 무조건 참인 논리 연산자 추가해서 조건 수정할 때 간단하게 주석처리만 하면 되게끔 하면 좋음&lt;ul&gt;
&lt;li&gt;예시&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;pre&gt;&lt;code&gt;SELECT *
FROM PRODUCT
WHERE 1=1
-- AND category = &amp;#39;주방용품&amp;#39;
AND price &amp;gt; 1000&lt;/code&gt;&lt;/pre&gt;&lt;ul&gt;
&lt;li&gt;1 as new_col 이런 식으로 새로운 컬럼 만들어낼 수도 있음&lt;/li&gt;
&lt;li&gt;CONCAT(name, &amp;#39;의 가격은&amp;#39;, price, &amp;#39;원 입니다.&amp;#39;) 이런 식으로 자료형 상관없이 이을 수 있음&lt;/li&gt;
&lt;li&gt;COALESCE(purchase_date, &amp;#39;Unknown&amp;#39;) : 널값 채우기&lt;/li&gt;
&lt;li&gt;CREATE TABLE IF NOT EXISTS : 테이블이 없으면 생성.&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/54</guid>
      <comments>https://hynnjnn.tistory.com/54#entry54comment</comments>
      <pubDate>Mon, 4 Dec 2023 23:44:49 +0900</pubDate>
    </item>
    <item>
      <title>2023-12-01 데브코스 2주차 - 5 TIL</title>
      <link>https://hynnjnn.tistory.com/53</link>
      <description>&lt;h3&gt;결측치&lt;/h3&gt;
&lt;ul&gt;
&lt;li&gt;NA: Not Available&lt;/li&gt;
&lt;li&gt;NaN: Not a Number&lt;/li&gt;
&lt;li&gt;Null: 아무것도 존재하지 않음&lt;/li&gt;
&lt;/ul&gt;
&lt;h3&gt;결측치가 발생하는 이유&lt;/h3&gt;
&lt;ul&gt;
&lt;li&gt;실제로 값을 입력하지 않은 경우&lt;/li&gt;
&lt;li&gt;설문조사 등 입력 칸에 미응답&lt;/li&gt;
&lt;li&gt;데이터의 오류&lt;/li&gt;
&lt;/ul&gt;
&lt;h3&gt;결측치 처리의 중요성&lt;/h3&gt;
&lt;ul&gt;
&lt;li&gt;결측치는 데이터의 손실을 불러올 수 있음&lt;/li&gt;
&lt;li&gt;결측치 대체에 따라 데이터 편향이 생길 수 있음&lt;/li&gt;
&lt;li&gt;결측치 처리에 따라 분석 결과가 많이 달라질 수 있음&lt;/li&gt;
&lt;/ul&gt;
&lt;h3&gt;결측값 대체하기&lt;/h3&gt;
&lt;ol&gt;
&lt;li&gt;&lt;p&gt;평균으로 대체하기&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;결측값을 평균값으로 대체&lt;/li&gt;
&lt;li&gt;손실은 줄어들 수 있으나 불확실&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;완전 제거하기&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;결측치가 포함되면 분석 대상에서 제거&lt;/li&gt;
&lt;li&gt;편향 가능성 줄어드나 정보 손실&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;회귀 대체법&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;회귀 방정식을 통해 결측값을 예측값으로 대체&lt;/li&gt;
&lt;li&gt;쉽지않음&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ol&gt;
&lt;br/&gt;


&lt;h3&gt;시각화 개인 공부&lt;/h3&gt;
&lt;p&gt; wine data &amp;#39;quality&amp;#39; 컬럼과 다른 컬럼 상관관계 분석하기&lt;/p&gt;
&lt;p&gt; &lt;strong&gt;1. scatterplot&lt;/strong&gt;&lt;/p&gt;
&lt;pre&gt;&lt;code class=&quot;language-python&quot;&gt; import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd

 for i in use_columns:
   plt.figure(figsize=(10, 6))
   sns.scatterplot(x=i, y=&amp;#39;quality&amp;#39;, data=df)
   plt.title(&amp;#39;Relationship between &amp;#39;+i+&amp;#39; Content and Quality&amp;#39;)
   plt.xlabel(i+&amp;#39; Content&amp;#39;)
   plt.ylabel(&amp;#39;Quality&amp;#39;)
   plt.show()  &lt;/code&gt;&lt;/pre&gt;
&lt;p&gt; &lt;figure class=&quot;imageblock alignCenter&quot; width=&quot;100%&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/DwANB/btsBgL7ghQc/vk3H07sp9INzPwJyteGCVK/img.jpg&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/DwANB/btsBgL7ghQc/vk3H07sp9INzPwJyteGCVK/img.jpg&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/DwANB/btsBgL7ghQc/vk3H07sp9INzPwJyteGCVK/img.jpg&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FDwANB%2FbtsBgL7ghQc%2Fvk3H07sp9INzPwJyteGCVK%2Fimg.jpg&quot; width=&quot;100%&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p&gt;어디에 점이 많이 뭉쳐있는지 직관적이지 않아서 점의 밀도에 따라 색깔을 바꿔주는 방법을 알아보았다.&lt;/p&gt;
&lt;br/&gt;

&lt;pre&gt;&lt;code class=&quot;language-python&quot;&gt; import matplotlib.pyplot as plt
 import seaborn as sns
 import pandas as pd
 from scipy import stats

 for i in use_columns:
   values = np.vstack([df[i], df[&amp;#39;quality&amp;#39;]])
   kernel = stats.gaussian_kde(values)(values)

   plt.figure(figsize=(10, 6))
   sns.scatterplot(x=i, y=&amp;#39;quality&amp;#39;, data=df, c=kernel)
   plt.title(&amp;#39;Relationship between &amp;#39;+i+&amp;#39; Content and Quality&amp;#39;)
   plt.xlabel(i+&amp;#39; Content&amp;#39;)
   plt.ylabel(&amp;#39;Quality&amp;#39;)
   plt.show()&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; width=&quot;100%&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ne2eh/btsBeWIkPbW/drK92vtiWIDfVcyvOL92t0/img.jpg&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ne2eh/btsBeWIkPbW/drK92vtiWIDfVcyvOL92t0/img.jpg&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ne2eh/btsBeWIkPbW/drK92vtiWIDfVcyvOL92t0/img.jpg&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fne2eh%2FbtsBeWIkPbW%2FdrK92vtiWIDfVcyvOL92t0%2Fimg.jpg&quot; width=&quot;100%&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p&gt;gaussian_kde : 가우시안 커널/필터를 사용한 kde(kernel density estimation)&lt;/p&gt;
&lt;p&gt;gaussian_kde(dataset)(weight) 이런 식으로 파라미터가 들어감&lt;/p&gt;</description>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/53</guid>
      <comments>https://hynnjnn.tistory.com/53#entry53comment</comments>
      <pubDate>Fri, 1 Dec 2023 15:28:02 +0900</pubDate>
    </item>
    <item>
      <title>2023-11-29 데브코스 2주차 - 3 TIL</title>
      <link>https://hynnjnn.tistory.com/52</link>
      <description>&lt;h3&gt;엑셀 데이터 소개 및 함수 사용&lt;/h3&gt;
&lt;h4&gt;엑셀 기준 데이터 종류&lt;/h4&gt;
&lt;ul&gt;
&lt;li&gt;논리 데이터&lt;/li&gt;
&lt;li&gt;수식 데이터&lt;/li&gt;
&lt;li&gt;논리 함수 (AND, OR, NOT)&lt;/li&gt;
&lt;li&gt;집계 함수 (SUM, MIN, MAX, AVERAGE)&lt;/li&gt;
&lt;/ul&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/52</guid>
      <comments>https://hynnjnn.tistory.com/52#entry52comment</comments>
      <pubDate>Wed, 29 Nov 2023 17:54:39 +0900</pubDate>
    </item>
    <item>
      <title>2023-11-27 데브코스 2주차 - 1 TIL</title>
      <link>https://hynnjnn.tistory.com/51</link>
      <description>&lt;h3&gt;데이터란 무엇인가&lt;/h3&gt;
&lt;p&gt;사전적 정의: 이론을 세우는 데 기초가 되는 사실 또는 바탕이 되는 자료&lt;br&gt;문자형, 숫자형, 날짜형 등등 &lt;/p&gt;
&lt;br/&gt;

&lt;p&gt;데이터 -&amp;gt; 정보 -&amp;gt; 지식 -&amp;gt; 지혜&lt;br&gt;ex) 영어 표준점수141점 -&amp;gt; 영어 1등급 -&amp;gt; 수준이 높다. -&amp;gt; 다른 과목을 공부해야겠다.&lt;/p&gt;
&lt;br/&gt;

&lt;h3&gt;실습 - 공공데이터 포탈 살펴보기&lt;/h3&gt;
&lt;p&gt;&lt;a href=&quot;https://www.data.go.kr/tcs/dss/selectFileDataDetailView.do?publicDataPk=15071311&quot;&gt;서울교통공사_지하철혼잡도정보&lt;/a&gt;&lt;/p&gt;
&lt;p&gt;데이터를 통해 얻은 정보&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;&lt;p&gt;평일 출근시간인 7시부터 혼잡도가 증가하기 시작해서 8시, 8시 30분 피크 도달 후 점점 감소&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;평일 퇴근시간도 5시부터 증가하기 시작해서 6시 피크 도달 후 점점 감소&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;평일과 주말, 공휴일의 혼잡도 차이가 극명하다.&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;특히 많은 수가 몰리는 역인 사당, 강남, 서울대입구, 낙성대, 성신여대 등의 특징을 살펴보면 대학교 주변이나 회사 밀집 지역, 환승역 등의 특징을 가지고 있다.&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h4&gt;평일, 주말, 공휴일 비교하기&lt;/h4&gt;
&lt;pre&gt;&lt;code class=&quot;language-python&quot;&gt;df_day = df.groupby(&amp;#39;요일구분&amp;#39;).mean()
df_day = df_day.drop([&amp;#39;연번&amp;#39;, &amp;#39;호선&amp;#39;, &amp;#39;역번호&amp;#39;], axis=1)

import matplotlib.pyplot as plt
%matplotlib inline

fig = plt.figure()

plt.plot(df_day.loc[&amp;#39;평일&amp;#39;], color=&amp;#39;green&amp;#39;, marker=&amp;#39;o&amp;#39;, linestyle=&amp;#39;solid&amp;#39;, label=&amp;#39;평일&amp;#39;)
plt.plot(df_day.loc[&amp;#39;공휴일&amp;#39;], color=&amp;#39;red&amp;#39;, marker=&amp;#39;o&amp;#39;, linestyle=&amp;#39;solid&amp;#39;, label=&amp;#39;공휴일&amp;#39;)
plt.plot(df_day.loc[&amp;#39;토요일&amp;#39;], color=&amp;#39;blue&amp;#39;, marker=&amp;#39;o&amp;#39;, linestyle=&amp;#39;solid&amp;#39;, label=&amp;#39;토요일&amp;#39;)

plt.xticks(rotation=90)

plt.show()
&lt;/code&gt;&lt;/pre&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; width=&quot;100%&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bmpMzu/btsAUmt3nPI/rH1XpXBdatFONB2NeUgIUK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bmpMzu/btsAUmt3nPI/rH1XpXBdatFONB2NeUgIUK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bmpMzu/btsAUmt3nPI/rH1XpXBdatFONB2NeUgIUK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbmpMzu%2FbtsAUmt3nPI%2FrH1XpXBdatFONB2NeUgIUK%2Fimg.png&quot; width=&quot;100%&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;br&gt;&lt;br/&gt;&lt;/p&gt;
&lt;p&gt;시간 날 때 해볼 것&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;그래프 깔끔하게 다듬기&lt;/li&gt;
&lt;li&gt;역 별로 비교해보기&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;


&lt;p&gt;실수로 이틀차까지 완료해버렸음.&lt;/p&gt;
&lt;h3&gt;도메인 지식 습득&lt;/h3&gt;
&lt;p&gt;$$ 프로그래밍 지식 \cap 수학 통계학 지식 \cap 도메인 지식 = 데이터 분석가$$&lt;/p&gt;
&lt;p&gt;전문적인 지식 외에도 시대적인 배경에 대한 지식도 도메인 지식이 될 수 있다.&lt;/p&gt;
&lt;p&gt;ex) 타이타닉 &lt;/p&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/51</guid>
      <comments>https://hynnjnn.tistory.com/51#entry51comment</comments>
      <pubDate>Mon, 27 Nov 2023 21:24:04 +0900</pubDate>
    </item>
    <item>
      <title>2023-11-24 데이터 활용 시 고려할 점</title>
      <link>https://hynnjnn.tistory.com/50</link>
      <description>&lt;h3&gt;데이터 관련 주의해야 할 이슈들&lt;/h3&gt;
&lt;h4&gt;데이터 관련한 최근 변화&lt;/h4&gt;
&lt;ul&gt;
&lt;li&gt;데이터의 양 증가&lt;/li&gt;
&lt;li&gt;Data Decentralization, Data Democratization&lt;/li&gt;
&lt;li&gt;클라우드&lt;/li&gt;
&lt;li&gt;데이터 거버넌스&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;-&amp;gt; 새로운 형태의 데이터 관리를 필요로 함&lt;/p&gt;
&lt;br/&gt;

&lt;h4&gt;데이터 관련 이슈&lt;/h4&gt;
&lt;ul&gt;
&lt;li&gt;&lt;p&gt;무분별한 데이터 전파&lt;br&gt;한번 노출되면 악순환&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;개인정보 생성 시 태그&lt;/li&gt;
&lt;li&gt;접근 권한 제어, 로깅 -&amp;gt; 감사&lt;/li&gt;
&lt;li&gt;개인정보 추출, 삭제 자동화&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;같은 데이터, 다른 해석&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;지표등의 정확한 정의 사전 필요 -&amp;gt; 데이터 사전&lt;/li&gt;
&lt;li&gt;각 지표 계산에서 바탕이 되는 데이터 = Source of Truth&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;


&lt;/li&gt;
&lt;/ul&gt;
&lt;ul&gt;
&lt;li&gt;&lt;p&gt;너무 많은 대시보드, 테이블 중복&lt;br&gt;Data Democratization -&amp;gt; 정보 과잉, 찾는데 비용 증가, 데이터 중복 처리 비용 증가&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Data Catalog 도입&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;메타 데이터의 부족 (메타데이터 = 데이터의 데이터: 이 테이블에서 어떤 것이 개인정보인지 태그)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;누가 주인인가? 어떻게 생성되었는가? -&amp;gt; Data Lineage 데이터 계승도&lt;/li&gt;
&lt;li&gt;데이터 별 오너 지정&lt;/li&gt;
&lt;li&gt;데이터 별 다양한 태그, 분류 체계 적용&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;불분명한 데이터 오너십&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;위 내용과 비슷&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;Data Silo&lt;br&gt;데이터 독점 현상. 한 조직이 데이터를 많이 가지고 있어서 다른 조직이 접근하기 어려움. 이것이 권력이 될 수도 있음.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Data Mesh 개념&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;

&lt;/li&gt;
&lt;/ul&gt;
&lt;h3&gt;개인정보와 보호&lt;/h3&gt;
&lt;p&gt;PII(Personally Identifiable Info)&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;보호가 필요한 중요 정보가 무엇인가?&lt;/li&gt;
&lt;li&gt;이런 정보들이 정말 우리에게 필요한 정보인가?&lt;/li&gt;
&lt;li&gt;이 정보에 대한 접근이 정말로 필요한 사람은 누구인가?&lt;/li&gt;
&lt;li&gt;이 사람들이 정말로 해당 정보가 필요할 때 접근을 하는가?&lt;/li&gt;
&lt;li&gt;이런 접근들이 모두 기록이 되는가?&lt;/li&gt;
&lt;li&gt;기록을 주기적으로 감사하는가?&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/50</guid>
      <comments>https://hynnjnn.tistory.com/50#entry50comment</comments>
      <pubDate>Fri, 24 Nov 2023 17:00:46 +0900</pubDate>
    </item>
    <item>
      <title>2023-11-23 Gen AI를 이용한 생산성 증대</title>
      <link>https://hynnjnn.tistory.com/49</link>
      <description>&lt;h3&gt;4-1 이전 강의 퀴즈 리뷰&lt;/h3&gt;
&lt;p&gt;애자일 개발 방법론에서 필요한 미팅이 아닌 것 = 스태프 미팅&lt;/p&gt;
&lt;br/&gt;

&lt;h3&gt;4-2 Gen AI 란? 1&lt;/h3&gt;
&lt;p&gt;$$ LLM \subset Gen AI \subset 딥러닝 \subset 머신러닝 \subset 인공지능 $$&lt;/p&gt;
&lt;br/&gt;

&lt;h4&gt;Gen AI&lt;/h4&gt;
&lt;ul&gt;
&lt;li&gt;생성형 AI&lt;/li&gt;
&lt;li&gt;학습된 컨텐츠 바탕으로 새로운 컨텐츠를 만들어 냄&lt;/li&gt;
&lt;li&gt;딥러닝의 일부&lt;/li&gt;
&lt;li&gt;LLM(Large Language Model)은 Gen AI의 일부&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;

&lt;h4&gt;딥러닝의 모델 타입&lt;/h4&gt;
&lt;ul&gt;
&lt;li&gt;Discriminative&lt;ul&gt;
&lt;li&gt;분류/예측. 레이블 있음&lt;/li&gt;
&lt;li&gt;피처들과 레이블들 간의 관계를 학습&lt;/li&gt;
&lt;li&gt;개/고양이 분류&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;

&lt;ul&gt;
&lt;li&gt;Generative&lt;ul&gt;
&lt;li&gt;비지도 학습&lt;/li&gt;
&lt;li&gt;통계적 특성을 이해 -&amp;gt; 새로운 데이터 생성&lt;/li&gt;
&lt;li&gt;개 이미지 생성&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;

&lt;p&gt;Gen AI, ML Model 동작 방식은 동일 y = f(x), but 출력이 다르다.&lt;/p&gt;
&lt;p&gt;Gen AI의 파운데이션 모델&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;&lt;p&gt;광범위한 데이터 세트에 대해 학습된 대규모 머신 러닝 모델의 한 유형&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Pre-trained&lt;/li&gt;
&lt;li&gt;Unsupervised Learning or Self Supervised Learning&lt;/li&gt;
&lt;li&gt;Transformer 모델 아키텍처를 사용&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;특별한 학습 없이 다양한 작업에 적용 가능 (GPT-3, GPT-4, BERT 등)&lt;/p&gt;
&lt;/li&gt;
&lt;li&gt;&lt;p&gt;파인 튜닝을 통해 도메인의 특성을 살려 특정 지식을 학습 시킴 (GPT -&amp;gt; ChatGPT)&lt;/p&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;br/&gt;

&lt;h3&gt;4-3 Gen AI 란? 2&lt;/h3&gt;
&lt;p&gt;Hallucination - 헛소리하는 거&lt;br&gt;멀티모달 파운데이션 - 다양한 형태의 데이터를 훈련&lt;/p&gt;
&lt;br/&gt;

&lt;h3&gt;4-4 ChatGPT 발전 살펴보기&lt;/h3&gt;
&lt;p&gt;GPT3 -&amp;gt; GPT4 -&amp;gt; GPT4 Turbo&lt;br&gt;context window가 커짐. &lt;/p&gt;
&lt;br/&gt;

&lt;p&gt;&lt;strong&gt;prompt 잘 쓰는 법&lt;/strong&gt;&lt;br&gt;역할 부여, 해야 하는 일, 구체적 Format, 구체적 목표, 제한 사항&lt;/p&gt;
&lt;br/&gt;</description>
      <category>데브코스/TIL</category>
      <author>Hynnjnn</author>
      <guid isPermaLink="true">https://hynnjnn.tistory.com/49</guid>
      <comments>https://hynnjnn.tistory.com/49#entry49comment</comments>
      <pubDate>Thu, 23 Nov 2023 18:03:39 +0900</pubDate>
    </item>
  </channel>
</rss>