[R] extract all numbers from a string

Nick Matzke
Mon Jun 17 03:00:49 CEST 2013

```Thanks *VERY* much, this is great!

I realized a few more cases, I think I've got something that
covers all the possibilities now:

library(stringr)
tmpstr = "The first number is: 32.  Another one is: 32.1.
Here's a number in scientific format, 0.3523e10, and
another, 0.3523e-10, and a negative, -313.1"

patternslist = NULL
p=0
patternslist[[(p=p+1)]] = "(\\d+)"				# positive integer
patternslist[[(p=p+1)]] = "(-\\d+)"				# negative integer
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+)"		# positive float
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e\\d+)"	# positive
float, scientific w. positive power
patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e-\\d+)" # positive
float, scientific w. negative power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+)"		# negative float
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e\\d+)"	# negative
float, scientific w. positive power
patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e-\\d+)"# negative
float, scientific w. negative power

patternslist[[(p=p+1)]] = "(\\d+e\\d+)"			# positive int,
scientific w. positive power
patternslist[[(p=p+1)]] = "(\\d+e-\\d+)" 		# positive int,
scientific w. negative power
patternslist[[(p=p+1)]] = "(-\\d+e\\d+)"		# negative int,
scientific w. positive power
patternslist[[(p=p+1)]] = "(-\\d+e-\\d+)"		# negative int,
scientific w. negative power

pattern = paste(patternslist, collapse="|", sep="")
pattern
as.numeric(str_extract_all(tmpstr,pattern)[[1]])

# A more complex string
tmpstr = "The first number is: 32.  342 342.1   -3234e-10
3234e-1 Another one is: 32.1. Here's a number in scientific
format, 0.3523e10, and another, 0.3523e-10, and a negative,
-313.1"
#pattern =
"(\\d)+|(-\\d)+|(\\d+\\.\\d+)|(-\\d+\\.\\d+)|(\\d+.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)|(-\\d+.\\d+e\\d+)|(-\\d+\\.\\d+e-\\d+)"
as.numeric(str_extract_all(tmpstr,pattern)[[1]])

Cheers!
Nick

PS: A function version:

# Extract numbers / get numbers / get all numbers from a
text string
getnums <- function(tmpstr)
{
# Example string
# tmpstr = "The first number is: 32.  342 342.1   -3234e-10
3234e-1 Another one is: 32.1. Here's a number in
scientific format, 0.3523e10, and another, 0.3523e-10, and a
negative, -313.1"

library(stringr)

# 	patternslist = NULL
# 	p=0
# 	patternslist[[(p=p+1)]] = "(\\d+)"				# positive integer
# 	patternslist[[(p=p+1)]] = "(-\\d+)"				# negative integer
# 	patternslist[[(p=p+1)]] = "(\\d+\\.\\d+)"		# positive float
# 	patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e\\d+)"	# positive
float, scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(\\d+\\.\\d+e-\\d+)" #
positive float, scientific w. negative power
# 	patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+)"		# negative float
# 	patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e\\d+)"	#
negative float, scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(-\\d+\\.\\d+e-\\d+)"#
negative float, scientific w. negative power
#
# 	patternslist[[(p=p+1)]] = "(\\d+e\\d+)"			# positive int,
scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(\\d+e-\\d+)" 		# positive
int, scientific w. negative power
# 	patternslist[[(p=p+1)]] = "(-\\d+e\\d+)"		# negative int,
scientific w. positive power
# 	patternslist[[(p=p+1)]] = "(-\\d+e-\\d+)"		# negative
int, scientific w. negative power
#
# 	pattern = paste(patternslist, collapse="|", sep="")

# set up the pattern
pattern =
"(\\d+)|(-\\d+)|(\\d+\\.\\d+)|(\\d+\\.\\d+e\\d+)|(\\d+\\.\\d+e-\\d+)|(-\\d+\\.\\d+)|(-\\d+\\.\\d+e\\d+)|(-\\d+\\.\\d+e-\\d+)|(\\d+e\\d+)|(\\d+e-\\d+)|(-\\d+e\\d+)|(-\\d+e-\\d+)"

# Get the numbers
nums_from_tmpstr =
as.numeric(str_extract_all(tmpstr,pattern)[[1]])

# Return them
return(nums_from_tmpstr)
}

