Skip to content

Commit 7e5a29a

Browse files
authored
Merge pull request #5 from databrickslabs/release
Minor adjustments to test suite & build
2 parents 38c1a38 + cfbbccd commit 7e5a29a

File tree

4 files changed

+14
-6
lines changed

4 files changed

+14
-6
lines changed

build.sbt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import sbt.nio.Keys._
1010
lazy val scala212 = "2.12.8"
1111
lazy val scala211 = "2.11.12"
1212

13-
lazy val sparkVersion = sys.env.getOrElse("SPARK_VERSION", "3.0.0")
13+
lazy val sparkVersion = sys.env.getOrElse("SPARK_VERSION", "3.0.1")
1414

1515
def majorMinorVersion(version: String): String = {
1616
StringUtils.ordinalIndexOf(version, ".", 2) match {
@@ -158,7 +158,7 @@ lazy val sparkClasspath = taskKey[String]("sparkClasspath")
158158
lazy val sparkHome = taskKey[String]("sparkHome")
159159

160160
// Publish to Bintray
161-
ThisBuild / description := "An open-source toolkit for large-scale genomic analysis"
161+
ThisBuild / description := "An open-source toolkit for large-scale EHR processing"
162162
ThisBuild / homepage := Some(url("https://databricks.com/solutions/industries/healthcare"))
163163
ThisBuild / scmInfo := Some(
164164
ScmInfo(

src/main/scala/com/databricks/labs/smolder/Message.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,16 @@ private[smolder] object Message {
6464
* @return Parses the message into a Message case class.
6565
*/
6666
def apply(text: UTF8String): Message = {
67+
68+
val delim: Byte = 0x0d
69+
6770
if (text == null) {
6871
null
6972
} else {
7073
val textString = text.toString
7174
require(textString.nonEmpty, "Received empty string.")
7275

73-
Message(textString.split('\n').toIterator)
76+
Message(textString.split(delim.toChar).toIterator)
7477
}
7578
}
7679
}

src/test/scala/com/databricks/labs/smolder/MessageSuite.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,10 @@ class MessageSuite extends SmolderBaseTest {
101101

102102
test("parse a full message, by string") {
103103

104+
val delim: Byte = 0x0d
105+
104106
val file = testFile("single_record.hl7")
105-
val lines = Source.fromFile(file).getLines().mkString("\n")
107+
val lines = Source.fromFile(file).getLines().mkString(delim.toChar.toString)
106108

107109
val message = Message(UTF8String.fromString(lines))
108110

src/test/scala/com/databricks/labs/smolder/functionsSuite.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ class functionsSuite extends SmolderBaseTest {
3030
.wholeTextFiles(file)
3131
.map(p => TextFile(p._1, p._2)))
3232

33-
val hl7Df = df.select(parse_hl7_message(df("value")).alias("hl7"))
33+
val cleanDF = df.select(regexp_replace(df("value"), "\n", "\r").alias("clean"))
34+
35+
val hl7Df = cleanDF.select(parse_hl7_message(cleanDF("clean")).alias("hl7"))
3436

3537
assert(hl7Df.count() === 1)
3638
assert(hl7Df.selectExpr("explode(hl7.segments)").count() === 3)
@@ -59,7 +61,8 @@ class functionsSuite extends SmolderBaseTest {
5961
.wholeTextFiles(file)
6062
.map(p => TextFile(p._1, p._2)))
6163

62-
val hl7Df = df.select(parse_hl7_message(df("value")).alias("hl7"))
64+
val cleanDF = df.select(regexp_replace(df("value"), "\n", "\r").alias("clean"))
65+
val hl7Df = cleanDF.select(parse_hl7_message(cleanDF("clean")).alias("hl7"))
6366

6467
val evnType = hl7Df.select(segment_field("EVN", 0, col("hl7.segments"))
6568
.alias("type"))

0 commit comments

Comments
 (0)