0. Read in the Data

You may need to run it once at the beginning of your work; after that, you’d better keep them as comments to save running time for knitr.

load("~/Dropbox/STAT992/DataSource/Dat.RData")
load("~/Dropbox/STAT992/DataSource/Et.RData")

1. Referral Network Clustering

library(data.table)
library(igraph)
## 
## Attaching package: 'igraph'
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union
setkey(Et, V1) #setkey for Et
head(Et)
##            V1         V2         V3         V4 V5
## 1: 1000026017 1598773715 38         16         18
## 2: 1003000126 1003951625 58         23          5
## 3: 1003000126 1053306746 25         13          6
## 4: 1003000126 1093886657 200        24          2
## 5: 1003000126 1093893331 107        16          6
## 6: 1003000126 1134120231 30         15          1
setkey(Dat, NPI) #setkey for Dat
head(Dat)
##           NPI NPPES Provider Gender NPPES Provider City
## 1: 1003000126                     M          CUMBERLAND
## 2: 1003000134                     M            EVANSTON
## 3: 1003000142                     M              TOLEDO
## 4: 1003000407                     M              PATTON
## 5: 1003000423                     F           CLEVELAND
## 6: 1003000480                     M              AURORA
##    NPPES Provider Zip Code NPPES Provider State NPPES Provider Country
## 1:                   21502                   MD                     US
## 2:                   60201                   IL                     US
## 3:                   43623                   OH                     US
## 4:                   16668                   PA                     US
## 5:                   44106                   OH                     US
## 6:                   80045                   CO                     US
##            Provider Type Medicare Participation Indicator Number of HCPCS
## 1:     Internal Medicine                                Y              22
## 2:             Pathology                                Y              13
## 3:        Anesthesiology                                Y              42
## 4:       Family Practice                                Y              37
## 5: Obstetrics/Gynecology                                Y              33
## 6:       General Surgery                                Y              60
##    Number of Services Number of Unique Beneficiaries
## 1:               1648                            665
## 2:               7518                           3940
## 3:                661                            144
## 4:               1683                            436
## 5:                320                             63
## 6:                252                            146
##    Total Submitted Charges Total Medicare Allowed Amount
## 1:                  395335                     150353.69
## 2:                 1211595                     285503.85
## 3:                  197224                      64712.32
## 4:                  240818                     176679.83
## 5:                   31637                      13393.05
## 6:                  338191                      67123.80
##    Total Medicare Payment Amount Drug Suppress Indicator
## 1:                     116332.66                        
## 2:                     217960.62                        
## 3:                      49752.77                        
## 4:                     138741.21                        
## 5:                      10320.43                       *
## 6:                      51955.64                        
##    Number of HCPCS Associated With Drug Services Number of Drug Services
## 1:                                             0                       0
## 2:                                             0                       0
## 3:                                             0                       0
## 4:                                             0                       0
## 5:                                            NA                      NA
## 6:                                             0                       0
##    Number of Unique Beneficiaries With Drug Services
## 1:                                                 0
## 2:                                                 0
## 3:                                                 0
## 4:                                                 0
## 5:                                                NA
## 6:                                                 0
##    Total Drug Submitted Charges Total Drug Medicare Allowed Amount
## 1:                            0                                  0
## 2:                            0                                  0
## 3:                            0                                  0
## 4:                            0                                  0
## 5:                           NA                                 NA
## 6:                            0                                  0
##    Total Drug Medicare Payment Amount Medical Suppress Indicator
## 1:                                  0                           
## 2:                                  0                           
## 3:                                  0                           
## 4:                                  0                           
## 5:                                 NA                          #
## 6:                                  0                           
##    Number of HCPCS Associated With Medical Services
## 1:                                               22
## 2:                                               13
## 3:                                               42
## 4:                                               37
## 5:                                               NA
## 6:                                               60
##    Number of Medical Services
## 1:                       1648
## 2:                       7518
## 3:                        661
## 4:                       1683
## 5:                         NA
## 6:                        252
##    Number of Unique Beneficiaries With Medical Services
## 1:                                                  665
## 2:                                                 3940
## 3:                                                  144
## 4:                                                  436
## 5:                                                   NA
## 6:                                                  146
##    Total Medical Submitted Charges Total Medical Medicare Allowed Amount
## 1:                          395335                             150353.69
## 2:                         1211595                             285503.85
## 3:                          197224                              64712.32
## 4:                          240818                             176679.83
## 5:                              NA                                    NA
## 6:                          338191                              67123.80
##    Total Medical Medicare Payment Amount Average Age of Beneficiaries
## 1:                             116332.66                           74
## 2:                             217960.62                           76
## 3:                              49752.77                           63
## 4:                             138741.21                           76
## 5:                                    NA                           56
## 6:                              51955.64                           63
##    Number of Beneficiaries Age Less 65
## 1:                                 120
## 2:                                 100
## 3:                                  76
## 4:                                  65
## 5:                                  35
## 6:                                  60
##    Number of Beneficiaries Age 65 to 74
## 1:                                  186
## 2:                                 1669
## 3:                                   36
## 4:                                   97
## 5:                                   13
## 6:                                   56
##    Number of Beneficiaries Age 75 to 84
## 1:                                  205
## 2:                                 1472
## 3:                                   20
## 4:                                  137
## 5:                                   NA
## 6:                                   NA
##    Number of Beneficiaries Age Greater 84 Number of Female Beneficiaries
## 1:                                    154                            359
## 2:                                    699                           2025
## 3:                                     12                             72
## 4:                                    137                            265
## 5:                                     NA                             63
## 6:                                     NA                             80
##    Number of Male Beneficiaries Number of Non-Hispanic White Beneficiaries
## 1:                          306                                        639
## 2:                         1915                                       3736
## 3:                           72                                        109
## 4:                          171                                         NA
## 5:                            0                                         NA
## 6:                           66                                        100
##    Number of Black or African American Beneficiaries
## 1:                                                14
## 2:                                                53
## 3:                                                NA
## 4:                                                NA
## 5:                                                NA
## 6:                                                NA
##    Number of Asian Pacific Islander Beneficiaries
## 1:                                             NA
## 2:                                             41
## 3:                                             NA
## 4:                                             NA
## 5:                                             NA
## 6:                                             NA
##    Number of Hispanic Beneficiaries
## 1:                               NA
## 2:                               71
## 3:                               NA
## 4:                               NA
## 5:                               NA
## 6:                               23
##    Number of American Indian/Alaska Native Beneficiaries
## 1:                                                     0
## 2:                                                     0
## 3:                                                    NA
## 4:                                                    NA
## 5:                                                    NA
## 6:                                                    NA
##    Number of Beneficiaries With Race Not Elsewhere Classified
## 1:                                                         NA
## 2:                                                         39
## 3:                                                         NA
## 4:                                                         NA
## 5:                                                         NA
## 6:                                                         NA
##    Number of Beneficiaries With Medicare Only Entitlement
## 1:                                                    469
## 2:                                                   3781
## 3:                                                     77
## 4:                                                    274
## 5:                                                     28
## 6:                                                     90
##    Number of Beneficiaries With Medicare & Medicaid Entitlement
## 1:                                                          196
## 2:                                                          159
## 3:                                                           67
## 4:                                                          162
## 5:                                                           35
## 6:                                                           56
##    Percent (%) of Beneficiaries Identified With Alzheimer's Disease or Dementia
## 1:                                                                           32
## 2:                                                                            8
## 3:                                                                           11
## 4:                                                                           30
## 5:                                                                           NA
## 6:                                                                            8
##    Percent (%) of Beneficiaries Identified With Asthma
## 1:                                                  13
## 2:                                                   4
## 3:                                                  13
## 4:                                                  11
## 5:                                                  NA
## 6:                                                  12
##    Percent (%) of Beneficiaries Identified With Atrial Fibrillation
## 1:                                                               26
## 2:                                                               12
## 3:                                                               NA
## 4:                                                               23
## 5:                                                               NA
## 6:                                                               10
##    Percent (%) of Beneficiaries Identified With Cancer
## 1:                                                  16
## 2:                                                  13
## 3:                                                   8
## 4:                                                  14
## 5:                                                  NA
## 6:                                                  18
##    Percent (%) of Beneficiaries Identified With Chronic Kidney Disease
## 1:                                                                  56
## 2:                                                                  14
## 3:                                                                  22
## 4:                                                                  51
## 5:                                                                  NA
## 6:                                                                  40
##    Percent (%) of Beneficiaries Identified With Chronic Obstructive Pulmonary Disease
## 1:                                                                                 41
## 2:                                                                                  8
## 3:                                                                                 23
## 4:                                                                                 44
## 5:                                                                                 NA
## 6:                                                                                 24
##    Percent (%) of Beneficiaries Identified With Depression
## 1:                                                      38
## 2:                                                      11
## 3:                                                      57
## 4:                                                      43
## 5:                                                      41
## 6:                                                      41
##    Percent (%) of Beneficiaries Identified With Diabetes
## 1:                                                    54
## 2:                                                    20
## 3:                                                    51
## 4:                                                    43
## 5:                                                    30
## 6:                                                    40
##    Percent (%) of Beneficiaries Identified With Heart Failure
## 1:                                                         50
## 2:                                                         11
## 3:                                                         19
## 4:                                                         50
## 5:                                                         NA
## 6:                                                         18
##    Percent (%) of Beneficiaries Identified With Hyperlipidemia
## 1:                                                          70
## 2:                                                          57
## 3:                                                          53
## 4:                                                          61
## 5:                                                          43
## 6:                                                          49
##    Percent (%) of Beneficiaries Identified With Hypertension
## 1:                                                        75
## 2:                                                        60
## 3:                                                        72
## 4:                                                        75
## 5:                                                        57
## 6:                                                        75
##    Percent (%) of Beneficiaries Identified With Ischemic Heart Disease
## 1:                                                                  67
## 2:                                                                  29
## 3:                                                                  40
## 4:                                                                  59
## 5:                                                                  22
## 6:                                                                  34
##    Percent (%) of Beneficiaries Identified With Osteoporosis
## 1:                                                        13
## 2:                                                         8
## 3:                                                        NA
## 4:                                                        17
## 5:                                                        NA
## 6:                                                        11
##    Percent (%) of Beneficiaries Identified With Rheumatoid Arthritis / Osteoarthritis
## 1:                                                                                 47
## 2:                                                                                 36
## 3:                                                                                 74
## 4:                                                                                 38
## 5:                                                                                 30
## 6:                                                                                 40
##    Percent (%) of Beneficiaries Identified With Schizophrenia / Other Psychotic Disorders
## 1:                                                                                     16
## 2:                                                                                      1
## 3:                                                                                     NA
## 4:                                                                                     17
## 5:                                                                                     NA
## 6:                                                                                     NA
##    Percent (%) of Beneficiaries Identified With Stroke
## 1:                                                  20
## 2:                                                   4
## 3:                                                  NA
## 4:                                                  10
## 5:                                                   0
## 6:                                                  NA
##    Average HCC Risk Score of Beneficiaries
## 1:                                  2.1082
## 2:                                  1.0151
## 3:                                  1.5662
## 4:                                  1.8967
## 5:                                  1.1882
## 6:                                  2.1587
pie(-sort(-(table(Dat$"Provider Type"))))

providertype.top15=names(-sort(-table(Dat$`Provider Type`))[1:15])## almost 75% of Dat, the rest 25% is defined as others
Dat$`Provider Type`[which(!(Dat$`Provider Type` %in% providertype.top15))]="Others"
pie(-sort(-(table(Dat$"Provider Type"))))

tmp.link = Et[unique(Dat$NPI)] #link two data sets
tmp.link = tmp.link[complete.cases(tmp.link)] #only keep the complete cases
referral.matrix = as.matrix(tmp.link)[,1:2]
referral.undirected.graph = graph.edgelist(referral.matrix,directed=F)
referral.undirected.graph = simplify(referral.undirected.graph) #simplify the graph
core.referral.graph = graph.coreness(referral.undirected.graph) #get core
hist(core.referral.graph)

sum(core.referral.graph > 20)
## [1] 140366
referral.graph.all = induced.subgraph(graph=referral.undirected.graph,vids=V(referral.undirected.graph)[core.referral.graph>20])


#plot(referral.graph.all, vertex.label=NA, main = "Overall Network Graph") #Loooooong time to draw. Maybe we only focus on Wisconsin. In the following, to save your time, I "#" all the command lines and add the output.
#clust.referral.all = clusters(referral.graph.all) #clustering
#clust.referral.all$csize  #seems all connected
#[1] 140366
#clust.eigen.referral.all = cluster_leading_eigen(referral.graph.all)
#table(clust.eigen.referral.all$membership)
#    1     2     3     4     5     6     7     8     9    10 
#91579 16945  9327  3249   488 16503   704  1321    50   200 
#This seems a good clustering. To call the membership index you need to use the following command
#clust.eigen.referral.all$membership
#V(referral.graph.all)

Now we try the same thing on Wisconsin, so at least we have a graph on the network.

wisc.referral = Dat[`NPPES Provider State`== "WI"]
tmp.link.wisc = Et[unique(wisc.referral$NPI)] #link two data sets
tmp.link.wisc = tmp.link.wisc[complete.cases(tmp.link.wisc)] #only keep the complete cases
referral.matrix.wisc = as.matrix(tmp.link.wisc)[,1:2]
referral.undirected.graph.wisc = graph.edgelist(referral.matrix.wisc,directed=F)
referral.undirected.graph.wisc = simplify(referral.undirected.graph.wisc) #simplify the graph
core.referral.graph.wisc = graph.coreness(referral.undirected.graph.wisc) #get core
hist(core.referral.graph.wisc)

sum(core.referral.graph.wisc > 20)
## [1] 1590
referral.graph.wisc = induced.subgraph(graph=referral.undirected.graph.wisc,vids=V(referral.undirected.graph.wisc)[core.referral.graph.wisc>20])
plot(referral.graph.wisc, vertex.label=NA, main = "Wisconsin Network Graph")

clust.referral.wisc = clusters(referral.graph.wisc) 
clust.referral.wisc$csize
## [1] 1590
clust.eigen.referral.wisc = cluster_leading_eigen(referral.graph.wisc)
table(clust.eigen.referral.wisc$membership)
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13 
## 233 344  46  97 183  71  94  16  94  65 129  89 129
third.cluster.wisc = induced.subgraph(graph=referral.undirected.graph.wisc,vids=(clust.eigen.referral.wisc$membership==3))
plot(third.cluster.wisc,vertex.label=NA,main = "Graph of 3rd cluster within WI")

2. plot with money and provider type,reason

library(colorspace)
Dat.type=Dat
pie(-sort(-(table(Dat$"Provider Type"))))

providertype.top15=names(-sort(-table(Dat$`Provider Type`))[1:15])## almost 75% of Dat, the rest 25% is defined as others
Dat.type$`Provider Type`[which(!(Dat$`Provider Type` %in% providertype.top15))]="Others"
pie(-sort(-(table(Dat.type$"Provider Type"))))

wisc.referral = Dat.type[`NPPES Provider State`== "WI"]
tmp.link.wisc = Et[unique(wisc.referral$NPI)] #link two data sets
tmp.link.wisc.forplot=tmp.link.wisc[which(tmp.link.wisc$V2 %in% wisc.referral$NPI)]
tmp.link.wisc.forplot = tmp.link.wisc.forplot[complete.cases(tmp.link.wisc.forplot)] #only keep the complete cases
referral.matrix.wisc.forplot = as.matrix(tmp.link.wisc.forplot)[,1:2]
referral.undirected.graph.wisc.forplot = graph.edgelist(referral.matrix.wisc.forplot,directed=F)
referral.undirected.graph.wisc.forplot = simplify(referral.undirected.graph.wisc.forplot) 
features = colnames(Dat.type)[c(3,7,12:14)]
wigraph=referral.undirected.graph.wisc.forplot


wigraph.core = graph.coreness(wigraph) #get core
sum(wigraph.core)
## [1] 64843
wigraph.corereduce = induced.subgraph(graph=wigraph,vids=V(wigraph)[wigraph.core>10])
sum(graph.coreness(wigraph.corereduce))
## [1] 43349
ids = V(wigraph.corereduce)$name
tmp =wisc.referral[ids, mult = "last"]
atbs = tmp[,features, with = F]  # Thank you google for helping to find "with" 1043240922
mean(complete.cases(atbs))
## [1] 1
atbs = as.matrix(atbs)
for(i in 1:ncol(atbs)){
wigraph.corereduce = set.vertex.attribute(wigraph.corereduce, name = colnames(atbs)[i], index=V(wigraph.corereduce),value =  atbs[,i])
}

set.seed(1)
locs = layout.lgl(wigraph.corereduce)

V(wigraph.corereduce)$size <- (as.numeric(V(wigraph.corereduce)$`Total Medicare Payment Amount`))^(1/6)

#color

plot(wigraph.corereduce, vertex.label = NA,vertex.color = rainbow_hcl(10)[(as.factor(V(wigraph.corereduce)$"Provider Type"))], layout = locs,main="Wisconsin(Core>10)")

legend('topleft',legend = levels(as.factor(V(wigraph.corereduce)$'Provider Type')), col=rainbow_hcl(10), pch=16,cex = 0.5)

3. Zipcode clusters

library(zipcode)
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following objects are masked from 'package:igraph':
## 
##     %>%, as_data_frame, groups, union
## 
## The following objects are masked from 'package:data.table':
## 
##     between, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(maps)
## 
##  # ATTENTION: maps v3.0 has an updated 'world' map.        #
##  # Many country borders and names have changed since 1990. #
##  # Type '?world' or 'news(package="maps")'. See README_v3. #
## 
## 
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:plyr':
## 
##     ozone
library(ggplot2)
library(ggsubplot)
## 
## Attaching package: 'ggsubplot'
## 
## The following object is masked from 'package:ggplot2':
## 
##     rel
## 
## The following object is masked from 'package:dplyr':
## 
##     nasa
data(zipcode)
zipcode = as.data.table(zipcode) 
setkey(zipcode, zip)


clust.referral.all = clusters(referral.graph.all) #clustering
clust.eigen.referral.all = cluster_leading_eigen(referral.graph.all)
table(clust.eigen.referral.all$membership)
## 
##     1     2     3     4     5     6     7     8     9    10 
## 91579 16945  9327  3249   488 16503   704  1321    50   200
clusterIndex = data.frame(NPI = names(V(referral.graph.all)), index = clust.eigen.referral.all$membership)

# The following code find Zipcode correspond to NPI:
zipToNPI = data.frame(zipcode = Dat$`NPPES Provider Zip Code`, NPI = Dat$NPI)
clusterIndex = merge(zipToNPI, clusterIndex, by = "NPI")

head(clusterIndex)
##          NPI zipcode index
## 1 1003000126   21502     1
## 2 1003000936   29203     1
## 3 1003001363   92243     2
## 4 1003002783   15213     1
## 5 1003002809   10011     1
## 6 1003002932   92055     1
colnames(clusterIndex)[2] = "zip"
colnames(clusterIndex)[3] = "cluster"
zipdata = na.omit(merge(clusterIndex, zipcode, by = "zip"))
usa_map <- map_data("state")
zipdata$cluster = as.factor(zipdata$cluster)
p = ggplot(data = zipdata, aes(x = longitude, y = latitude))
p + geom_polygon(data = usa_map, aes(x = long, y = lat, group = group, fill = region, alpha = .5), show_guide = FALSE) +
 geom_point(aes(colour = cluster, size = "0.0001")) + xlim(-175, -50) # +
## Warning: Removed 2 rows containing missing values (geom_point).

  # scale_color_brewer(type = "qual", palette = 3)

4. Explore payment and build regression model within each cluster

library(grDevices)
library(Matrix)
library(rARPACK)
library(plyr)
suppressPackageStartupMessages(library(dplyr))
library(ggplot2)
library(data.table)
library(igraph)

Now we foucs on Wisconsin

Try some regression for WI

##           NPI NPPES Provider Gender NPPES Provider City
## 1: 1093710741                     M           MILWAUKEE
##    NPPES Provider Zip Code NPPES Provider State NPPES Provider Country
## 1:                   53226                   WI                     US
##    Provider Type Medicare Participation Indicator Number of HCPCS
## 1:        Others                                Y              55
##    Number of Services Number of Unique Beneficiaries
## 1:              50541                           1449
##    Total Submitted Charges Total Medicare Allowed Amount
## 1:                16350958                      10469189
##    Total Medicare Payment Amount Drug Suppress Indicator
## 1:                       8189064                        
##    Number of HCPCS Associated With Drug Services Number of Drug Services
## 1:                                             6                   35005
##    Number of Unique Beneficiaries With Drug Services
## 1:                                               709
##    Total Drug Submitted Charges Total Drug Medicare Allowed Amount
## 1:                     13365977                            8848571
##    Total Drug Medicare Payment Amount Medical Suppress Indicator
## 1:                            6961852                           
##    Number of HCPCS Associated With Medical Services
## 1:                                               49
##    Number of Medical Services
## 1:                      15536
##    Number of Unique Beneficiaries With Medical Services
## 1:                                                 1449
##    Total Medical Submitted Charges Total Medical Medicare Allowed Amount
## 1:                         2984981                               1620618
##    Total Medical Medicare Payment Amount Average Age of Beneficiaries
## 1:                               1227212                           80
##    Number of Beneficiaries Age Less 65
## 1:                                  74
##    Number of Beneficiaries Age 65 to 74
## 1:                                  338
##    Number of Beneficiaries Age 75 to 84
## 1:                                  514
##    Number of Beneficiaries Age Greater 84 Number of Female Beneficiaries
## 1:                                    523                            891
##    Number of Male Beneficiaries Number of Non-Hispanic White Beneficiaries
## 1:                          558                                       1310
##    Number of Black or African American Beneficiaries
## 1:                                                82
##    Number of Asian Pacific Islander Beneficiaries
## 1:                                             16
##    Number of Hispanic Beneficiaries
## 1:                               28
##    Number of American Indian/Alaska Native Beneficiaries
## 1:                                                    NA
##    Number of Beneficiaries With Race Not Elsewhere Classified
## 1:                                                         NA
##    Number of Beneficiaries With Medicare Only Entitlement
## 1:                                                   1283
##    Number of Beneficiaries With Medicare & Medicaid Entitlement
## 1:                                                          166
##    Percent (%) of Beneficiaries Identified With Alzheimer's Disease or Dementia
## 1:                                                                           14
##    Percent (%) of Beneficiaries Identified With Asthma
## 1:                                                   7
##    Percent (%) of Beneficiaries Identified With Atrial Fibrillation
## 1:                                                               15
##    Percent (%) of Beneficiaries Identified With Cancer
## 1:                                                  12
##    Percent (%) of Beneficiaries Identified With Chronic Kidney Disease
## 1:                                                                  31
##    Percent (%) of Beneficiaries Identified With Chronic Obstructive Pulmonary Disease
## 1:                                                                                 12
##    Percent (%) of Beneficiaries Identified With Depression
## 1:                                                      17
##    Percent (%) of Beneficiaries Identified With Diabetes
## 1:                                                    40
##    Percent (%) of Beneficiaries Identified With Heart Failure
## 1:                                                         22
##    Percent (%) of Beneficiaries Identified With Hyperlipidemia
## 1:                                                          63
##    Percent (%) of Beneficiaries Identified With Hypertension
## 1:                                                        74
##    Percent (%) of Beneficiaries Identified With Ischemic Heart Disease
## 1:                                                                  43
##    Percent (%) of Beneficiaries Identified With Osteoporosis
## 1:                                                         8
##    Percent (%) of Beneficiaries Identified With Rheumatoid Arthritis / Osteoarthritis
## 1:                                                                                 35
##    Percent (%) of Beneficiaries Identified With Schizophrenia / Other Psychotic Disorders
## 1:                                                                                      3
##    Percent (%) of Beneficiaries Identified With Stroke
## 1:                                                   5
##    Average HCC Risk Score of Beneficiaries
## 1:                                  1.4611

## 
## Call:
## lm(formula = payment.wisc ~ deg.graph.wisc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -49668  -40923  -26871    6798 8139344 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    49790.56    1616.04  30.810   <2e-16 ***
## deg.graph.wisc   -23.40      57.36  -0.408    0.683    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 132500 on 8607 degrees of freedom
## Multiple R-squared:  1.934e-05,  Adjusted R-squared:  -9.684e-05 
## F-statistic: 0.1665 on 1 and 8607 DF,  p-value: 0.6833

The degree distribution for the network is long tail. And log–log scale Degree distribution shows a fairly linear decay in the log-frequency as a function of log-degree. There is a tendency for vertices of higher degrees to link with similar vertices , vertices of lower degree tend to link with vertices of both lower and higher degrees. The highest payment is the type Ophthalmology. The linear relationship for degree vs. payment is not significant .

## [1] 2149
## [1] "Provider Type"                  "Number of Services"            
## [3] "Number of Unique Beneficiaries" "Total Medicare Payment Amount" 
## [5] "Average Age of Beneficiaries"
## [1] 1
## IGRAPH UN-- 2149 36321 -- 
## + attr: name (v/c), Provider Type (v/c), Number of Services (v/c),
## | Number of Unique Beneficiaries (v/c), Total Medicare Payment
## | Amount (v/c), Average Age of Beneficiaries (v/c)

use walktrap to detect community

spectral cluster

## 
##  16  21  15  20   9   1   7   8  17  12  14  22   2   4   5  19  23  10 
##   5  20  30  46  48  51  60  62  65  68  70  74  76  78  78  80  81  82 
##  11  18   3  24  13   6  25 
##  84 103 110 112 132 185 349

plot for spectral cluster

Take each cluster as a unit of observation, and obtain summary measures of the networks INSIDE each of those clusters. Try to find some relationship.

## Source: local data frame [6 x 5]
## 
##    class mean_degree mean_payment mean_ave_pay mean_ave_age
##   (fctr)       (dbl)        (dbl)        (dbl)        (dbl)
## 1      1    18.01961     74530.04     54.09793     73.76471
## 2      2    23.50000     91632.48     64.60651     73.22368
## 3      3    37.26364    116074.97     63.36514     74.84545
## 4      4    28.39744    140247.10     59.19074     72.91026
## 5      5    30.76923    101098.60     46.47988     73.34615
## 6      6    36.18378    102417.16     59.91388     72.45405

This is the summary data.frame for each cluster

regression for mean payment and mean degree in each cluster

## 
## Call:
## lm(formula = mean_payment ~ mean_degree + mean_ave_age, data = summary.frame.referral.graph.wisc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -50214 -32115 -16505  27521 139477 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -218709.8   408345.7  -0.536   0.5976  
## mean_degree     2603.7      950.1   2.740   0.0119 *
## mean_ave_age    3531.7     5642.5   0.626   0.5378  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48070 on 22 degrees of freedom
## Multiple R-squared:  0.2703, Adjusted R-squared:  0.204 
## F-statistic: 4.075 on 2 and 22 DF,  p-value: 0.03122

The positive liner relationship for mean payment and mean degree in each cluster is significant!

5. Provider Type

tmp.link = Et[unique(Dat$NPI)] #link two data sets
tmp.link = tmp.link[complete.cases(tmp.link)] #only keep the complete cases
tmp.link = tmp.link[which(tmp.link$V2 %in% Dat$`NPI`)]
referral.matrix = as.matrix(tmp.link)[,1:2]
referral.undirected.graph = graph.edgelist(referral.matrix,directed=F)
referral.undirected.graph = simplify(referral.undirected.graph) #simplify the graph
features = colnames(Dat)[7]
ids = V(referral.undirected.graph)$name
tmp = Dat[ids, mult = "last"]
atbs = tmp[,features, with = F]
mean(complete.cases(atbs))
## [1] 1
atbs = as.matrix(atbs)
for(i in 1:ncol(atbs)){
  referral.undirected.graph = set.vertex.attribute(referral.undirected.graph, name = colnames(atbs)[i],
                                                   index=V(referral.undirected.graph), value =  atbs[,i])
}

table(V(referral.undirected.graph)$`Provider Type`)
## 
##        Anesthesiology            Cardiology          Chiropractic 
##                 20035                 19859                  2513 
##                  CRNA  Diagnostic Radiology    Emergency Medicine 
##                 17837                 24219                 30823 
##       Family Practice     Internal Medicine    Nurse Practitioner 
##                 44343                 64813                 30799 
## Obstetrics/Gynecology             Optometry    Orthopedic Surgery 
##                  3170                  4743                 13822 
##                Others    Physical Therapist   Physician Assistant 
##                147450                  8076                 25068 
##            Psychiatry 
##                  7600
Type.names <- sort(unique(V(referral.undirected.graph)$`Provider Type`))
Type.nums <- V(referral.undirected.graph)$`Provider Type` %>% as.factor() %>% as.numeric()

Type.c <- contract.vertices(referral.undirected.graph, Type.nums)
Type.c <- simplify(Type.c)
Type.size <- as.vector(table(V(referral.undirected.graph)$`Provider Type`))
plot(Type.c, vertex.size=1/40*sqrt(Type.size),
     vertex.color=V(Type.c), vertex.label=NA,
     edge.arrow.size=0,
     layout=layout.kamada.kawai)

In this part, the features of provider types in the network will be studied.