File tree Expand file tree Collapse file tree 4 files changed +14
-6
lines changed
main/scala/com/databricks/labs/smolder
test/scala/com/databricks/labs/smolder Expand file tree Collapse file tree 4 files changed +14
-6
lines changed Original file line number Diff line number Diff line change @@ -10,7 +10,7 @@ import sbt.nio.Keys._
10
10
lazy val scala212 = " 2.12.8"
11
11
lazy val scala211 = " 2.11.12"
12
12
13
- lazy val sparkVersion = sys.env.getOrElse(" SPARK_VERSION" , " 3.0.0 " )
13
+ lazy val sparkVersion = sys.env.getOrElse(" SPARK_VERSION" , " 3.0.1 " )
14
14
15
15
def majorMinorVersion (version : String ): String = {
16
16
StringUtils .ordinalIndexOf(version, " ." , 2 ) match {
@@ -158,7 +158,7 @@ lazy val sparkClasspath = taskKey[String]("sparkClasspath")
158
158
lazy val sparkHome = taskKey[String ](" sparkHome" )
159
159
160
160
// Publish to Bintray
161
- ThisBuild / description := " An open-source toolkit for large-scale genomic analysis "
161
+ ThisBuild / description := " An open-source toolkit for large-scale EHR processing "
162
162
ThisBuild / homepage := Some (url(" https://databricks.com/solutions/industries/healthcare" ))
163
163
ThisBuild / scmInfo := Some (
164
164
ScmInfo (
Original file line number Diff line number Diff line change @@ -64,13 +64,16 @@ private[smolder] object Message {
64
64
* @return Parses the message into a Message case class.
65
65
*/
66
66
def apply (text : UTF8String ): Message = {
67
+
68
+ val delim : Byte = 0x0d
69
+
67
70
if (text == null ) {
68
71
null
69
72
} else {
70
73
val textString = text.toString
71
74
require(textString.nonEmpty, " Received empty string." )
72
75
73
- Message (textString.split(' \n ' ).toIterator)
76
+ Message (textString.split(delim.toChar ).toIterator)
74
77
}
75
78
}
76
79
}
Original file line number Diff line number Diff line change @@ -101,8 +101,10 @@ class MessageSuite extends SmolderBaseTest {
101
101
102
102
test(" parse a full message, by string" ) {
103
103
104
+ val delim : Byte = 0x0d
105
+
104
106
val file = testFile(" single_record.hl7" )
105
- val lines = Source .fromFile(file).getLines().mkString(" \n " )
107
+ val lines = Source .fromFile(file).getLines().mkString(delim.toChar.toString )
106
108
107
109
val message = Message (UTF8String .fromString(lines))
108
110
Original file line number Diff line number Diff line change @@ -30,7 +30,9 @@ class functionsSuite extends SmolderBaseTest {
30
30
.wholeTextFiles(file)
31
31
.map(p => TextFile (p._1, p._2)))
32
32
33
- val hl7Df = df.select(parse_hl7_message(df(" value" )).alias(" hl7" ))
33
+ val cleanDF = df.select(regexp_replace(df(" value" ), " \n " , " \r " ).alias(" clean" ))
34
+
35
+ val hl7Df = cleanDF.select(parse_hl7_message(cleanDF(" clean" )).alias(" hl7" ))
34
36
35
37
assert(hl7Df.count() === 1 )
36
38
assert(hl7Df.selectExpr(" explode(hl7.segments)" ).count() === 3 )
@@ -59,7 +61,8 @@ class functionsSuite extends SmolderBaseTest {
59
61
.wholeTextFiles(file)
60
62
.map(p => TextFile (p._1, p._2)))
61
63
62
- val hl7Df = df.select(parse_hl7_message(df(" value" )).alias(" hl7" ))
64
+ val cleanDF = df.select(regexp_replace(df(" value" ), " \n " , " \r " ).alias(" clean" ))
65
+ val hl7Df = cleanDF.select(parse_hl7_message(cleanDF(" clean" )).alias(" hl7" ))
63
66
64
67
val evnType = hl7Df.select(segment_field(" EVN" , 0 , col(" hl7.segments" ))
65
68
.alias(" type" ))
You can’t perform that action at this time.
0 commit comments