Skip to content

Commit f7420d7

Browse files
authored
Merge pull request #31 from databricks-industry-solutions/hotfix-large-files
Hotfix large files
2 parents abe5a4d + c4b466f commit f7420d7

12 files changed

+268
-478
lines changed

00_[PLEASE READ] Contributing to Solution Accelerators.py

Lines changed: 0 additions & 127 deletions
This file was deleted.

00_sample_claims.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# Databricks notebook source
2+
# MAGIC %md # 837i and 837p
3+
4+
# COMMAND ----------
5+
6+
# DBTITLE 1,Install package
7+
# MAGIC %pip install git+https://github.com/databricks-industry-solutions/x12-edi-parser.git
8+
9+
# COMMAND ----------
10+
11+
# DBTITLE 1,Read in sample 837 data
12+
from databricksx12 import *
13+
from databricksx12.hls import *
14+
import json, os
15+
from pyspark.sql.functions import input_file_name
16+
17+
#hm manages the parsing of different formats
18+
hm = HealthcareManager()
19+
df = spark.read.text("file:///" + os.getcwd() + "/sampledata/837/*txt", wholetext = True)
20+
21+
rdd = (
22+
df.withColumn("filename", input_file_name()).rdd
23+
.map(lambda row: (row.filename, EDI(row.value)))
24+
.map(lambda edi: hm.flatten(edi[1], filename = edi[0]))
25+
.flatMap(lambda x: x)
26+
)
27+
28+
claims_rdd = (
29+
rdd.repartition(4)
30+
.map(lambda x: hm.flatten_to_json(x))
31+
.map(lambda x: json.dumps(x))
32+
)
33+
claims = spark.read.json(claims_rdd)
34+
35+
# COMMAND ----------
36+
37+
# DBTITLE 1,Save as a view
38+
claims.createOrReplaceTempView("stg_claims")
39+
40+
# COMMAND ----------
41+
42+
# MAGIC %sql
43+
# MAGIC select * from stg_claims
44+
45+
# COMMAND ----------
46+
47+
# DBTITLE 1,Create Claim Header
48+
# MAGIC %sql
49+
# MAGIC drop table if exists claim_header;
50+
# MAGIC create table claim_header as
51+
# MAGIC select * except(claim_lines)
52+
# MAGIC from stg_claims
53+
# MAGIC ;
54+
# MAGIC
55+
# MAGIC SELECT * FROM claim_header
56+
57+
# COMMAND ----------
58+
59+
# DBTITLE 1,Create Claim Line
60+
# MAGIC %sql
61+
# MAGIC drop table if exists claim_line;
62+
# MAGIC create table claim_line as
63+
# MAGIC select * except(claim_header, claim_lines)
64+
# MAGIC from (
65+
# MAGIC select *, explode(claim_lines) as claim_line
66+
# MAGIC from stg_claims
67+
# MAGIC )
68+
# MAGIC ;
69+
# MAGIC
70+
# MAGIC SELECT * FROM claim_line
71+
72+
# COMMAND ----------
73+
74+
# MAGIC %md # 835
75+
76+
# COMMAND ----------
77+
78+
# DBTITLE 1,Read in sample 835 data
79+
from databricksx12 import *
80+
from databricksx12.hls import *
81+
import json, os
82+
from pyspark.sql.functions import input_file_name
83+
84+
hm = HealthcareManager()
85+
df = spark.read.text(df = spark.read.text("file:///" + os.getcwd() + "/sampledata/835/*txt", wholetext = True)
86+
87+
88+
rdd = (
89+
df.withColumn("filename", input_file_name()).rdd
90+
.map(lambda row: (row.filename, EDI(row.value, strict_transactions=False))) #strict_transactions = False ignores if SE01 is an incorrect value (shoudld be set to number of segments in transaction)
91+
.map(lambda edi: hm.flatten(edi[1], filename = edi[0]))
92+
.flatMap(lambda x: x)
93+
)
94+
95+
claims_rdd = (
96+
rdd.repartition(4)
97+
.map(lambda x: hm.flatten_to_json(x))
98+
.map(lambda x: json.dumps(x))
99+
)
100+
101+
claims = spark.read.json(claims_rdd)
102+
103+
# COMMAND ----------
104+
105+
# DBTITLE 1,Save as a view
106+
claims.createOrReplaceTempView("stg_remittance")
107+
108+
# COMMAND ----------
109+
110+
# DBTITLE 1,Create Remittance
111+
# MAGIC %sql
112+
# MAGIC drop table if exists remittance;
113+
# MAGIC CREATE TABLE remittance
114+
# MAGIC as
115+
# MAGIC select *
116+
# MAGIC from stg_remittance
117+
# MAGIC ;
118+
# MAGIC
119+
# MAGIC SELECT * FROM remittance;

0 commit comments

Comments
 (0)