본문 바로가기

Programing Language/Python

[visualization] Correlation visualization on a geoplot

반응형

Correlation between nodes on a geoplotting

Environmental

  • linux
  • jupyterlab

Feature

지도상에 여러 노드들의 Correlation을 시각화하는 plotting을 해야 할 필요가 있어서 한 작업물이다. Geopandas를 이용해 웹에서 가져온 json을 시각화하였고, subplot으로 각 축에 histogram을 삽입하였다. 컬러바를 이 histogram의 bar에 아예 삽입하였다.

axis를 임의의 위치에 추가하는 점, legend customize, 특정 축 없애기 등의 잔기술이 들어갔다.

#legend #customize

import os
import numpy as np
from datetime import datetime
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.dates as mdates
import matplotlib.font_manager as fm

import geopandas as gpd
fm.get_fontconfig_fonts()

font_location = '/usr/share/fonts/truetype/nanum/NanumGothicCoding.ttf'
fprop = fm.FontProperties(fname=font_location)


df_places = gpd.read_file('./OPENAPI/Data/skorea-provinces-2018-geo.json')


sts = dict()
for s, lat, lon in _df[['station name', 'latitude', 'longitude']].values:
    sts[s] = [lon, lat]
tg = ['t', 's']

_stations = [x.split('_')[0] for x in temp[0][0].values]
dfs = temp[1]

__sts = []
__corrs = []
for i, df1 in enumerate(dfs):
    for j, df2 in enumerate(dfs[i+1:]):
        j=j+1
        try:
            sts[_stations[i]]+sts[_stations[j]]
            __ = pd.merge(df1[tg],df2[tg], how='outer',on='t', suffixes=('_'+_stations[i], '_'+_stations[j])).corr().values[1,-1]
            __sts.append([_stations[i], _stations[j]])
            __corrs.append(__)
        except:
            pass

######################### PARAMS #########################
CONST_SIZE = 40
EXP = 1
cmap = plt.cm.Blues
nbin = 10

fig=plt.figure(figsize = (8.5,4), dpi=120, tight_layout=True)
spec = gridspec.GridSpec(ncols=2, nrows=1, figure=fig)
axes = []

######################### Orignal #########################
ax = fig.add_subplot(spec[0,0])
axes.append(ax)
plt.xlabel('longitude')
plt.ylabel('latitude')

##### PLOT GEO
df_places.plot(ax = ax, color='grey')

##### PLOT STATION MARK
msk = [x in s_list for x in _df['station name'].values]
sct_ = plt.scatter(_df.longitude[msk], _df.latitude[msk], CONST_SIZE, c='black', zorder=3)
# sct_.set_facecolor("none")
sct_.set_edgecolor("none")

##### PLOT EDGES
__cs = []
for i, (s1, s2) in enumerate(__sts):
    corr = __corrs[i]
    if corr > 0:
        plt.plot([sts[s1][0], sts[s2][0]], [sts[s1][1], sts[s2][1]], c=cmap(corr), alpha = 1., zorder=2, lw = 1)
        __cs.append(corr)

### SET LEGEND
plt.plot([], [], c=cmap(.8), label='Tidal level')
plt.legend()        

### SET HIST
ax = fig.add_axes([0.338, 0.27, 0.15, 0.3]) #  [left, bottom, width, height] 
ax.set_facecolor('none')
# ax.axis('off')
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.yaxis.tick_right()
# ax.spines['bottom'].set_visible(True)
# ax.set_edgecolor(None)

hist, _bins = np.histogram(__cs, bins = nbin)
ct_bin = (_bins[:-1] + _bins[1:])/2

bin_size = _bins[-1] - _bins[0]
ax.bar(ct_bin, hist, bin_size/nbin*.85, alpha=.8, color = cmap(ct_bin))
ax.plot(ct_bin, hist, bin_size/nbin*.85, c='black')
ax.set_xlim([.55,1.1])
# plt.text(.6, 10, 'Corr hist')
plt.xlabel('Correlation hist')


######################### First IMF #########################
cmap = plt.cm.Greens
sts = dict()
for s, lat, lon in _df[['station name', 'latitude', 'longitude']].values:
    sts[s] = [lon, lat]
tg = ['t', 'eIMF_0']

_stations = [x.split('_')[0] for x in temp[0][0].values]
dfs = temp[1]

__sts = []
__corrs = []
for i, df1 in enumerate(dfs):
    for j, df2 in enumerate(dfs[i+1:]):
        j=j+1
        try:
            sts[_stations[i]]+sts[_stations[j]]
            __ = pd.merge(df1[tg],df2[tg], how='outer',on='t', suffixes=('_'+_stations[i], '_'+_stations[j])).corr().values[1,-1]
            __sts.append([_stations[i], _stations[j]])
            __corrs.append(__)
        except:
            pass


######################### POSITIVE #########################
ax = fig.add_subplot(spec[0,1])
axes.append(ax)
plt.xlabel('longitude')
# plt.ylabel('latitude')

##### PLOT GEO
df_places.plot(ax = ax, color='grey')

##### PLOT STATION MARK
msk = [x in s_list for x in _df['station name'].values]
sct_ = plt.scatter(_df.longitude[msk], _df.latitude[msk], CONST_SIZE, c='black', zorder=3)
# sct_.set_facecolor("none")
sct_.set_edgecolor("none")

##### PLOT EDGES
__cs = []
for i, (s1, s2) in enumerate(__sts):
    corr = __corrs[i]
    if corr > 0:
        plt.plot([sts[s1][0], sts[s2][0]], [sts[s1][1], sts[s2][1]], c=cmap(corr), alpha = 1., zorder=2, lw = 1)
        __cs.append(corr)

### SET LEGEND
plt.plot([], [], c=cmap(.8), label='First IMF')
plt.legend()

### SET HIST
ax = fig.add_axes([0.81, 0.27, 0.15, 0.3]) #  [left, bottom, width, height] 
ax.set_facecolor('none')
# ax.axis('off')
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.yaxis.tick_right()
# ax.spines['bottom'].set_visible(True)
# ax.set_edgecolor(None)

hist, _bins = np.histogram(__cs, bins = nbin)
ct_bin = (_bins[:-1] + _bins[1:])/2

bin_size = _bins[-1] - _bins[0]
ax.bar(ct_bin, hist, bin_size/nbin*.85, alpha=.8, color = cmap(ct_bin))
ax.plot(ct_bin, hist, bin_size/nbin*.85, c='black')
ax.set_xlim([-.1,1.1])
# plt.text(.6, 10, 'Corr hist')
plt.xlabel('Correlation hist')


fig.show()
반응형