You may need to run it once at the beginning of your work; after that, you’d better keep them as comments to save running time for knitr
.
load("~/Dropbox/STAT992/DataSource/Dat.RData")
load("~/Dropbox/STAT992/DataSource/Et.RData")
library(data.table)
library(igraph)
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
setkey(Et, V1) #setkey for Et
head(Et)
## V1 V2 V3 V4 V5
## 1: 1000026017 1598773715 38 16 18
## 2: 1003000126 1003951625 58 23 5
## 3: 1003000126 1053306746 25 13 6
## 4: 1003000126 1093886657 200 24 2
## 5: 1003000126 1093893331 107 16 6
## 6: 1003000126 1134120231 30 15 1
setkey(Dat, NPI) #setkey for Dat
head(Dat)
## NPI NPPES Provider Gender NPPES Provider City
## 1: 1003000126 M CUMBERLAND
## 2: 1003000134 M EVANSTON
## 3: 1003000142 M TOLEDO
## 4: 1003000407 M PATTON
## 5: 1003000423 F CLEVELAND
## 6: 1003000480 M AURORA
## NPPES Provider Zip Code NPPES Provider State NPPES Provider Country
## 1: 21502 MD US
## 2: 60201 IL US
## 3: 43623 OH US
## 4: 16668 PA US
## 5: 44106 OH US
## 6: 80045 CO US
## Provider Type Medicare Participation Indicator Number of HCPCS
## 1: Internal Medicine Y 22
## 2: Pathology Y 13
## 3: Anesthesiology Y 42
## 4: Family Practice Y 37
## 5: Obstetrics/Gynecology Y 33
## 6: General Surgery Y 60
## Number of Services Number of Unique Beneficiaries
## 1: 1648 665
## 2: 7518 3940
## 3: 661 144
## 4: 1683 436
## 5: 320 63
## 6: 252 146
## Total Submitted Charges Total Medicare Allowed Amount
## 1: 395335 150353.69
## 2: 1211595 285503.85
## 3: 197224 64712.32
## 4: 240818 176679.83
## 5: 31637 13393.05
## 6: 338191 67123.80
## Total Medicare Payment Amount Drug Suppress Indicator
## 1: 116332.66
## 2: 217960.62
## 3: 49752.77
## 4: 138741.21
## 5: 10320.43 *
## 6: 51955.64
## Number of HCPCS Associated With Drug Services Number of Drug Services
## 1: 0 0
## 2: 0 0
## 3: 0 0
## 4: 0 0
## 5: NA NA
## 6: 0 0
## Number of Unique Beneficiaries With Drug Services
## 1: 0
## 2: 0
## 3: 0
## 4: 0
## 5: NA
## 6: 0
## Total Drug Submitted Charges Total Drug Medicare Allowed Amount
## 1: 0 0
## 2: 0 0
## 3: 0 0
## 4: 0 0
## 5: NA NA
## 6: 0 0
## Total Drug Medicare Payment Amount Medical Suppress Indicator
## 1: 0
## 2: 0
## 3: 0
## 4: 0
## 5: NA #
## 6: 0
## Number of HCPCS Associated With Medical Services
## 1: 22
## 2: 13
## 3: 42
## 4: 37
## 5: NA
## 6: 60
## Number of Medical Services
## 1: 1648
## 2: 7518
## 3: 661
## 4: 1683
## 5: NA
## 6: 252
## Number of Unique Beneficiaries With Medical Services
## 1: 665
## 2: 3940
## 3: 144
## 4: 436
## 5: NA
## 6: 146
## Total Medical Submitted Charges Total Medical Medicare Allowed Amount
## 1: 395335 150353.69
## 2: 1211595 285503.85
## 3: 197224 64712.32
## 4: 240818 176679.83
## 5: NA NA
## 6: 338191 67123.80
## Total Medical Medicare Payment Amount Average Age of Beneficiaries
## 1: 116332.66 74
## 2: 217960.62 76
## 3: 49752.77 63
## 4: 138741.21 76
## 5: NA 56
## 6: 51955.64 63
## Number of Beneficiaries Age Less 65
## 1: 120
## 2: 100
## 3: 76
## 4: 65
## 5: 35
## 6: 60
## Number of Beneficiaries Age 65 to 74
## 1: 186
## 2: 1669
## 3: 36
## 4: 97
## 5: 13
## 6: 56
## Number of Beneficiaries Age 75 to 84
## 1: 205
## 2: 1472
## 3: 20
## 4: 137
## 5: NA
## 6: NA
## Number of Beneficiaries Age Greater 84 Number of Female Beneficiaries
## 1: 154 359
## 2: 699 2025
## 3: 12 72
## 4: 137 265
## 5: NA 63
## 6: NA 80
## Number of Male Beneficiaries Number of Non-Hispanic White Beneficiaries
## 1: 306 639
## 2: 1915 3736
## 3: 72 109
## 4: 171 NA
## 5: 0 NA
## 6: 66 100
## Number of Black or African American Beneficiaries
## 1: 14
## 2: 53
## 3: NA
## 4: NA
## 5: NA
## 6: NA
## Number of Asian Pacific Islander Beneficiaries
## 1: NA
## 2: 41
## 3: NA
## 4: NA
## 5: NA
## 6: NA
## Number of Hispanic Beneficiaries
## 1: NA
## 2: 71
## 3: NA
## 4: NA
## 5: NA
## 6: 23
## Number of American Indian/Alaska Native Beneficiaries
## 1: 0
## 2: 0
## 3: NA
## 4: NA
## 5: NA
## 6: NA
## Number of Beneficiaries With Race Not Elsewhere Classified
## 1: NA
## 2: 39
## 3: NA
## 4: NA
## 5: NA
## 6: NA
## Number of Beneficiaries With Medicare Only Entitlement
## 1: 469
## 2: 3781
## 3: 77
## 4: 274
## 5: 28
## 6: 90
## Number of Beneficiaries With Medicare & Medicaid Entitlement
## 1: 196
## 2: 159
## 3: 67
## 4: 162
## 5: 35
## 6: 56
## Percent (%) of Beneficiaries Identified With Alzheimer's Disease or Dementia
## 1: 32
## 2: 8
## 3: 11
## 4: 30
## 5: NA
## 6: 8
## Percent (%) of Beneficiaries Identified With Asthma
## 1: 13
## 2: 4
## 3: 13
## 4: 11
## 5: NA
## 6: 12
## Percent (%) of Beneficiaries Identified With Atrial Fibrillation
## 1: 26
## 2: 12
## 3: NA
## 4: 23
## 5: NA
## 6: 10
## Percent (%) of Beneficiaries Identified With Cancer
## 1: 16
## 2: 13
## 3: 8
## 4: 14
## 5: NA
## 6: 18
## Percent (%) of Beneficiaries Identified With Chronic Kidney Disease
## 1: 56
## 2: 14
## 3: 22
## 4: 51
## 5: NA
## 6: 40
## Percent (%) of Beneficiaries Identified With Chronic Obstructive Pulmonary Disease
## 1: 41
## 2: 8
## 3: 23
## 4: 44
## 5: NA
## 6: 24
## Percent (%) of Beneficiaries Identified With Depression
## 1: 38
## 2: 11
## 3: 57
## 4: 43
## 5: 41
## 6: 41
## Percent (%) of Beneficiaries Identified With Diabetes
## 1: 54
## 2: 20
## 3: 51
## 4: 43
## 5: 30
## 6: 40
## Percent (%) of Beneficiaries Identified With Heart Failure
## 1: 50
## 2: 11
## 3: 19
## 4: 50
## 5: NA
## 6: 18
## Percent (%) of Beneficiaries Identified With Hyperlipidemia
## 1: 70
## 2: 57
## 3: 53
## 4: 61
## 5: 43
## 6: 49
## Percent (%) of Beneficiaries Identified With Hypertension
## 1: 75
## 2: 60
## 3: 72
## 4: 75
## 5: 57
## 6: 75
## Percent (%) of Beneficiaries Identified With Ischemic Heart Disease
## 1: 67
## 2: 29
## 3: 40
## 4: 59
## 5: 22
## 6: 34
## Percent (%) of Beneficiaries Identified With Osteoporosis
## 1: 13
## 2: 8
## 3: NA
## 4: 17
## 5: NA
## 6: 11
## Percent (%) of Beneficiaries Identified With Rheumatoid Arthritis / Osteoarthritis
## 1: 47
## 2: 36
## 3: 74
## 4: 38
## 5: 30
## 6: 40
## Percent (%) of Beneficiaries Identified With Schizophrenia / Other Psychotic Disorders
## 1: 16
## 2: 1
## 3: NA
## 4: 17
## 5: NA
## 6: NA
## Percent (%) of Beneficiaries Identified With Stroke
## 1: 20
## 2: 4
## 3: NA
## 4: 10
## 5: 0
## 6: NA
## Average HCC Risk Score of Beneficiaries
## 1: 2.1082
## 2: 1.0151
## 3: 1.5662
## 4: 1.8967
## 5: 1.1882
## 6: 2.1587
pie(-sort(-(table(Dat$"Provider Type"))))
providertype.top15=names(-sort(-table(Dat$`Provider Type`))[1:15])## almost 75% of Dat, the rest 25% is defined as others
Dat$`Provider Type`[which(!(Dat$`Provider Type` %in% providertype.top15))]="Others"
pie(-sort(-(table(Dat$"Provider Type"))))
tmp.link = Et[unique(Dat$NPI)] #link two data sets
tmp.link = tmp.link[complete.cases(tmp.link)] #only keep the complete cases
referral.matrix = as.matrix(tmp.link)[,1:2]
referral.undirected.graph = graph.edgelist(referral.matrix,directed=F)
referral.undirected.graph = simplify(referral.undirected.graph) #simplify the graph
core.referral.graph = graph.coreness(referral.undirected.graph) #get core
hist(core.referral.graph)
sum(core.referral.graph > 20)
## [1] 140366
referral.graph.all = induced.subgraph(graph=referral.undirected.graph,vids=V(referral.undirected.graph)[core.referral.graph>20])
#plot(referral.graph.all, vertex.label=NA, main = "Overall Network Graph") #Loooooong time to draw. Maybe we only focus on Wisconsin. In the following, to save your time, I "#" all the command lines and add the output.
#clust.referral.all = clusters(referral.graph.all) #clustering
#clust.referral.all$csize #seems all connected
#[1] 140366
#clust.eigen.referral.all = cluster_leading_eigen(referral.graph.all)
#table(clust.eigen.referral.all$membership)
# 1 2 3 4 5 6 7 8 9 10
#91579 16945 9327 3249 488 16503 704 1321 50 200
#This seems a good clustering. To call the membership index you need to use the following command
#clust.eigen.referral.all$membership
#V(referral.graph.all)
Now we try the same thing on Wisconsin, so at least we have a graph on the network.
wisc.referral = Dat[`NPPES Provider State`== "WI"]
tmp.link.wisc = Et[unique(wisc.referral$NPI)] #link two data sets
tmp.link.wisc = tmp.link.wisc[complete.cases(tmp.link.wisc)] #only keep the complete cases
referral.matrix.wisc = as.matrix(tmp.link.wisc)[,1:2]
referral.undirected.graph.wisc = graph.edgelist(referral.matrix.wisc,directed=F)
referral.undirected.graph.wisc = simplify(referral.undirected.graph.wisc) #simplify the graph
core.referral.graph.wisc = graph.coreness(referral.undirected.graph.wisc) #get core
hist(core.referral.graph.wisc)
sum(core.referral.graph.wisc > 20)
## [1] 1590
referral.graph.wisc = induced.subgraph(graph=referral.undirected.graph.wisc,vids=V(referral.undirected.graph.wisc)[core.referral.graph.wisc>20])
plot(referral.graph.wisc, vertex.label=NA, main = "Wisconsin Network Graph")
clust.referral.wisc = clusters(referral.graph.wisc)
clust.referral.wisc$csize
## [1] 1590
clust.eigen.referral.wisc = cluster_leading_eigen(referral.graph.wisc)
table(clust.eigen.referral.wisc$membership)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 233 344 46 97 183 71 94 16 94 65 129 89 129
third.cluster.wisc = induced.subgraph(graph=referral.undirected.graph.wisc,vids=(clust.eigen.referral.wisc$membership==3))
plot(third.cluster.wisc,vertex.label=NA,main = "Graph of 3rd cluster within WI")
library(colorspace)
Dat.type=Dat
pie(-sort(-(table(Dat$"Provider Type"))))
providertype.top15=names(-sort(-table(Dat$`Provider Type`))[1:15])## almost 75% of Dat, the rest 25% is defined as others
Dat.type$`Provider Type`[which(!(Dat$`Provider Type` %in% providertype.top15))]="Others"
pie(-sort(-(table(Dat.type$"Provider Type"))))
wisc.referral = Dat.type[`NPPES Provider State`== "WI"]
tmp.link.wisc = Et[unique(wisc.referral$NPI)] #link two data sets
tmp.link.wisc.forplot=tmp.link.wisc[which(tmp.link.wisc$V2 %in% wisc.referral$NPI)]
tmp.link.wisc.forplot = tmp.link.wisc.forplot[complete.cases(tmp.link.wisc.forplot)] #only keep the complete cases
referral.matrix.wisc.forplot = as.matrix(tmp.link.wisc.forplot)[,1:2]
referral.undirected.graph.wisc.forplot = graph.edgelist(referral.matrix.wisc.forplot,directed=F)
referral.undirected.graph.wisc.forplot = simplify(referral.undirected.graph.wisc.forplot)
features = colnames(Dat.type)[c(3,7,12:14)]
wigraph=referral.undirected.graph.wisc.forplot
wigraph.core = graph.coreness(wigraph) #get core
sum(wigraph.core)
## [1] 64843
wigraph.corereduce = induced.subgraph(graph=wigraph,vids=V(wigraph)[wigraph.core>10])
sum(graph.coreness(wigraph.corereduce))
## [1] 43349
ids = V(wigraph.corereduce)$name
tmp =wisc.referral[ids, mult = "last"]
atbs = tmp[,features, with = F] # Thank you google for helping to find "with" 1043240922
mean(complete.cases(atbs))
## [1] 1
atbs = as.matrix(atbs)
for(i in 1:ncol(atbs)){
wigraph.corereduce = set.vertex.attribute(wigraph.corereduce, name = colnames(atbs)[i], index=V(wigraph.corereduce),value = atbs[,i])
}
set.seed(1)
locs = layout.lgl(wigraph.corereduce)
V(wigraph.corereduce)$size <- (as.numeric(V(wigraph.corereduce)$`Total Medicare Payment Amount`))^(1/6)
#color
plot(wigraph.corereduce, vertex.label = NA,vertex.color = rainbow_hcl(10)[(as.factor(V(wigraph.corereduce)$"Provider Type"))], layout = locs,main="Wisconsin(Core>10)")
legend('topleft',legend = levels(as.factor(V(wigraph.corereduce)$'Provider Type')), col=rainbow_hcl(10), pch=16,cex = 0.5)
library(zipcode)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:igraph':
##
## %>%, as_data_frame, groups, union
##
## The following objects are masked from 'package:data.table':
##
## between, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(maps)
##
## # ATTENTION: maps v3.0 has an updated 'world' map. #
## # Many country borders and names have changed since 1990. #
## # Type '?world' or 'news(package="maps")'. See README_v3. #
##
##
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:plyr':
##
## ozone
library(ggplot2)
library(ggsubplot)
##
## Attaching package: 'ggsubplot'
##
## The following object is masked from 'package:ggplot2':
##
## rel
##
## The following object is masked from 'package:dplyr':
##
## nasa
data(zipcode)
zipcode = as.data.table(zipcode)
setkey(zipcode, zip)
clust.referral.all = clusters(referral.graph.all) #clustering
clust.eigen.referral.all = cluster_leading_eigen(referral.graph.all)
table(clust.eigen.referral.all$membership)
##
## 1 2 3 4 5 6 7 8 9 10
## 91579 16945 9327 3249 488 16503 704 1321 50 200
clusterIndex = data.frame(NPI = names(V(referral.graph.all)), index = clust.eigen.referral.all$membership)
# The following code find Zipcode correspond to NPI:
zipToNPI = data.frame(zipcode = Dat$`NPPES Provider Zip Code`, NPI = Dat$NPI)
clusterIndex = merge(zipToNPI, clusterIndex, by = "NPI")
head(clusterIndex)
## NPI zipcode index
## 1 1003000126 21502 1
## 2 1003000936 29203 1
## 3 1003001363 92243 2
## 4 1003002783 15213 1
## 5 1003002809 10011 1
## 6 1003002932 92055 1
colnames(clusterIndex)[2] = "zip"
colnames(clusterIndex)[3] = "cluster"
zipdata = na.omit(merge(clusterIndex, zipcode, by = "zip"))
usa_map <- map_data("state")
zipdata$cluster = as.factor(zipdata$cluster)
p = ggplot(data = zipdata, aes(x = longitude, y = latitude))
p + geom_polygon(data = usa_map, aes(x = long, y = lat, group = group, fill = region, alpha = .5), show_guide = FALSE) +
geom_point(aes(colour = cluster, size = "0.0001")) + xlim(-175, -50) # +
## Warning: Removed 2 rows containing missing values (geom_point).
# scale_color_brewer(type = "qual", palette = 3)
library(grDevices)
library(Matrix)
library(rARPACK)
library(plyr)
suppressPackageStartupMessages(library(dplyr))
library(ggplot2)
library(data.table)
library(igraph)
## NPI NPPES Provider Gender NPPES Provider City
## 1: 1093710741 M MILWAUKEE
## NPPES Provider Zip Code NPPES Provider State NPPES Provider Country
## 1: 53226 WI US
## Provider Type Medicare Participation Indicator Number of HCPCS
## 1: Others Y 55
## Number of Services Number of Unique Beneficiaries
## 1: 50541 1449
## Total Submitted Charges Total Medicare Allowed Amount
## 1: 16350958 10469189
## Total Medicare Payment Amount Drug Suppress Indicator
## 1: 8189064
## Number of HCPCS Associated With Drug Services Number of Drug Services
## 1: 6 35005
## Number of Unique Beneficiaries With Drug Services
## 1: 709
## Total Drug Submitted Charges Total Drug Medicare Allowed Amount
## 1: 13365977 8848571
## Total Drug Medicare Payment Amount Medical Suppress Indicator
## 1: 6961852
## Number of HCPCS Associated With Medical Services
## 1: 49
## Number of Medical Services
## 1: 15536
## Number of Unique Beneficiaries With Medical Services
## 1: 1449
## Total Medical Submitted Charges Total Medical Medicare Allowed Amount
## 1: 2984981 1620618
## Total Medical Medicare Payment Amount Average Age of Beneficiaries
## 1: 1227212 80
## Number of Beneficiaries Age Less 65
## 1: 74
## Number of Beneficiaries Age 65 to 74
## 1: 338
## Number of Beneficiaries Age 75 to 84
## 1: 514
## Number of Beneficiaries Age Greater 84 Number of Female Beneficiaries
## 1: 523 891
## Number of Male Beneficiaries Number of Non-Hispanic White Beneficiaries
## 1: 558 1310
## Number of Black or African American Beneficiaries
## 1: 82
## Number of Asian Pacific Islander Beneficiaries
## 1: 16
## Number of Hispanic Beneficiaries
## 1: 28
## Number of American Indian/Alaska Native Beneficiaries
## 1: NA
## Number of Beneficiaries With Race Not Elsewhere Classified
## 1: NA
## Number of Beneficiaries With Medicare Only Entitlement
## 1: 1283
## Number of Beneficiaries With Medicare & Medicaid Entitlement
## 1: 166
## Percent (%) of Beneficiaries Identified With Alzheimer's Disease or Dementia
## 1: 14
## Percent (%) of Beneficiaries Identified With Asthma
## 1: 7
## Percent (%) of Beneficiaries Identified With Atrial Fibrillation
## 1: 15
## Percent (%) of Beneficiaries Identified With Cancer
## 1: 12
## Percent (%) of Beneficiaries Identified With Chronic Kidney Disease
## 1: 31
## Percent (%) of Beneficiaries Identified With Chronic Obstructive Pulmonary Disease
## 1: 12
## Percent (%) of Beneficiaries Identified With Depression
## 1: 17
## Percent (%) of Beneficiaries Identified With Diabetes
## 1: 40
## Percent (%) of Beneficiaries Identified With Heart Failure
## 1: 22
## Percent (%) of Beneficiaries Identified With Hyperlipidemia
## 1: 63
## Percent (%) of Beneficiaries Identified With Hypertension
## 1: 74
## Percent (%) of Beneficiaries Identified With Ischemic Heart Disease
## 1: 43
## Percent (%) of Beneficiaries Identified With Osteoporosis
## 1: 8
## Percent (%) of Beneficiaries Identified With Rheumatoid Arthritis / Osteoarthritis
## 1: 35
## Percent (%) of Beneficiaries Identified With Schizophrenia / Other Psychotic Disorders
## 1: 3
## Percent (%) of Beneficiaries Identified With Stroke
## 1: 5
## Average HCC Risk Score of Beneficiaries
## 1: 1.4611
##
## Call:
## lm(formula = payment.wisc ~ deg.graph.wisc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49668 -40923 -26871 6798 8139344
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49790.56 1616.04 30.810 <2e-16 ***
## deg.graph.wisc -23.40 57.36 -0.408 0.683
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 132500 on 8607 degrees of freedom
## Multiple R-squared: 1.934e-05, Adjusted R-squared: -9.684e-05
## F-statistic: 0.1665 on 1 and 8607 DF, p-value: 0.6833
The degree distribution for the network is long tail. And log–log scale Degree distribution shows a fairly linear decay in the log-frequency as a function of log-degree. There is a tendency for vertices of higher degrees to link with similar vertices , vertices of lower degree tend to link with vertices of both lower and higher degrees. The highest payment is the type Ophthalmology. The linear relationship for degree vs. payment is not significant .
## [1] 2149
## [1] "Provider Type" "Number of Services"
## [3] "Number of Unique Beneficiaries" "Total Medicare Payment Amount"
## [5] "Average Age of Beneficiaries"
## [1] 1
## IGRAPH UN-- 2149 36321 --
## + attr: name (v/c), Provider Type (v/c), Number of Services (v/c),
## | Number of Unique Beneficiaries (v/c), Total Medicare Payment
## | Amount (v/c), Average Age of Beneficiaries (v/c)
##
## 16 21 15 20 9 1 7 8 17 12 14 22 2 4 5 19 23 10
## 5 20 30 46 48 51 60 62 65 68 70 74 76 78 78 80 81 82
## 11 18 3 24 13 6 25
## 84 103 110 112 132 185 349
## Source: local data frame [6 x 5]
##
## class mean_degree mean_payment mean_ave_pay mean_ave_age
## (fctr) (dbl) (dbl) (dbl) (dbl)
## 1 1 18.01961 74530.04 54.09793 73.76471
## 2 2 23.50000 91632.48 64.60651 73.22368
## 3 3 37.26364 116074.97 63.36514 74.84545
## 4 4 28.39744 140247.10 59.19074 72.91026
## 5 5 30.76923 101098.60 46.47988 73.34615
## 6 6 36.18378 102417.16 59.91388 72.45405
This is the summary data.frame for each cluster
regression for mean payment and mean degree in each cluster
##
## Call:
## lm(formula = mean_payment ~ mean_degree + mean_ave_age, data = summary.frame.referral.graph.wisc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -50214 -32115 -16505 27521 139477
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -218709.8 408345.7 -0.536 0.5976
## mean_degree 2603.7 950.1 2.740 0.0119 *
## mean_ave_age 3531.7 5642.5 0.626 0.5378
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48070 on 22 degrees of freedom
## Multiple R-squared: 0.2703, Adjusted R-squared: 0.204
## F-statistic: 4.075 on 2 and 22 DF, p-value: 0.03122
The positive liner relationship for mean payment and mean degree in each cluster is significant!
tmp.link = Et[unique(Dat$NPI)] #link two data sets
tmp.link = tmp.link[complete.cases(tmp.link)] #only keep the complete cases
tmp.link = tmp.link[which(tmp.link$V2 %in% Dat$`NPI`)]
referral.matrix = as.matrix(tmp.link)[,1:2]
referral.undirected.graph = graph.edgelist(referral.matrix,directed=F)
referral.undirected.graph = simplify(referral.undirected.graph) #simplify the graph
features = colnames(Dat)[7]
ids = V(referral.undirected.graph)$name
tmp = Dat[ids, mult = "last"]
atbs = tmp[,features, with = F]
mean(complete.cases(atbs))
## [1] 1
atbs = as.matrix(atbs)
for(i in 1:ncol(atbs)){
referral.undirected.graph = set.vertex.attribute(referral.undirected.graph, name = colnames(atbs)[i],
index=V(referral.undirected.graph), value = atbs[,i])
}
table(V(referral.undirected.graph)$`Provider Type`)
##
## Anesthesiology Cardiology Chiropractic
## 20035 19859 2513
## CRNA Diagnostic Radiology Emergency Medicine
## 17837 24219 30823
## Family Practice Internal Medicine Nurse Practitioner
## 44343 64813 30799
## Obstetrics/Gynecology Optometry Orthopedic Surgery
## 3170 4743 13822
## Others Physical Therapist Physician Assistant
## 147450 8076 25068
## Psychiatry
## 7600
Type.names <- sort(unique(V(referral.undirected.graph)$`Provider Type`))
Type.nums <- V(referral.undirected.graph)$`Provider Type` %>% as.factor() %>% as.numeric()
Type.c <- contract.vertices(referral.undirected.graph, Type.nums)
Type.c <- simplify(Type.c)
Type.size <- as.vector(table(V(referral.undirected.graph)$`Provider Type`))
plot(Type.c, vertex.size=1/40*sqrt(Type.size),
vertex.color=V(Type.c), vertex.label=NA,
edge.arrow.size=0,
layout=layout.kamada.kawai)
In this part, the features of provider types in the network will be studied.