Text Mining of Permanent Downhole Gauges
Vignette Author
2018-02-18
library(petro.One)
library(tm)
my_url <- make_search_url(query = "Permanent Downhole Gauge",
how = "all")
get_papers_count(my_url) # how many papers total
## [1] 552
papers_by_type(my_url) # papers by type
## # A tibble: 3 x 2
## name value
## <chr> <dbl>
## 1 Conference paper 471
## 2 Journal paper 77.0
## 3 Presentation 4.00
# create a dataframe of papers found
df <- read_multidoc(my_url)
df
## # A tibble: 552 x 6
## title_data paper_id source type year author1_data
## <chr> <chr> <chr> <chr> <int> <chr>
## 1 Wavelet Filtering o~ " ~ " ~ " ~ 2009 Pico, Carlos,
## 2 Encouraging Experie~ " ~ " ~ " ~ 2009 Igbokoyi, A.O., ~
## 3 Permanent Downhole ~ " ~ " ~ " ~ 2009 Horng, Chen Jiun~
## 4 Pressure Transient ~ " ~ " ~ " ~ 2013 Al-hashim, Hasan~
## 5 Reservoir Managemen~ " ~ " ~ " ~ 2004 de Oliveira Silv~
## 6 Comparative Analysi~ " ~ " ~ " ~ 2014 Enyekwe, A.E., U~
## 7 Analyzing Transient~ " ~ " ~ " ~ 2007 Zheng, Shiyi, He~
## 8 Permanent Downhole ~ " ~ " ~ " ~ 2014 Pham, Hoanh Van,~
## 9 Wireless Retrofit S~ " ~ " ~ " ~ 2014 Green, Annabel, ~
## 10 Permanent Downhole ~ " ~ " ~ " ~ 1992 Bezerra, M.F.C.,~
## # ... with 542 more rows
library(petro.One)
term_freq <- term_frequency(df)
term_freq
## # A tibble: 1,514 x 2
## word freq
## <chr> <int>
## 1 reservoir 127
## 2 well 118
## 3 data 99
## 4 field 85
## 5 pressure 85
## 6 production 84
## 7 downhole 83
## 8 permanent 70
## 9 gas 69
## 10 analysis 62
## # ... with 1,504 more rows
library(petro.One)
plot_wordcloud(df, max.words = 100, min.freq = 15)

Bar plot
plot_bars(df, min.freq = 25)

dendogram
plot_relationships(df, min.freq = 25, threshold = 0.1)

library(cluster)
tdm <- get_term_document_matrix(df)$tdm
tdm.rst <- removeSparseTerms(tdm, 0.93)
d <- dist(tdm.rst, method="euclidian")
fit <- hclust(d=d, method="complete") # for a different look try substituting: method="ward.D"
fit
##
## Call:
## hclust(d = d, method = "complete")
##
## Cluster method : complete
## Distance : euclidean
## Number of objects: 16
