|
| 1 | +import 'dart:async'; |
| 2 | + |
| 3 | +import 'package:args/args.dart'; |
| 4 | +import 'package:dio/dio.dart'; |
| 5 | +import 'package:html/dom.dart' hide Comment; |
| 6 | +import 'package:html/parser.dart'; |
| 7 | +import 'package:html_unescape/html_unescape.dart'; |
| 8 | + |
| 9 | +Future<void> main(List<String> arguments) async { |
| 10 | + /// Get the GitHub token from args for so that we can create issues if |
| 11 | + /// anything doesn't go as expected. |
| 12 | + final ArgParser parser = ArgParser() |
| 13 | + ..addFlag('github-token', negatable: false, abbr: 't'); |
| 14 | + final ArgResults argResults = parser.parse(arguments); |
| 15 | + final String token = argResults.rest.first; |
| 16 | + |
| 17 | + /// The expected parser result. |
| 18 | + const String text = ''' |
| 19 | +What does it say about the world we live in where blogs do more basic journalism than CNN? All that one would have had to do is read the report actually provided. |
| 20 | +
|
| 21 | +I don't think I'm being too extreme when I say that, apart from maybe PBS, there is no reputable source of news in America. If you don't believe me, pick a random story, watch it as it gets rewritten a million times through Reuters, then check back on the facts of the story one year later. A news story gets twisted to promote some narrative that will sell papers, and when the facts of the story are finally verified (usually not by the news themselves, but lawyers or courts or whoever), the story is dropped and never reported on again. |
| 22 | +
|
| 23 | +Again, if the only thing a reporter had to do was read the report to find the facts of the case to verify what is and isn't true, what the fuck is even the point of a news agency?'''; |
| 24 | + |
| 25 | + /// Get HTML of the thread. |
| 26 | + const String itemBaseUrl = 'https://news.ycombinator.com/item?id='; |
| 27 | + const Map<String, String> headers = <String, String>{ |
| 28 | + 'accept': '*/*', |
| 29 | + 'user-agent': |
| 30 | + 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1', |
| 31 | + }; |
| 32 | + const int itemId = 11536543; |
| 33 | + final Dio dio = Dio(); |
| 34 | + final Uri url = Uri.parse('$itemBaseUrl$itemId'); |
| 35 | + final Options option = Options(headers: headers, persistentConnection: true); |
| 36 | + final Response<String> response = |
| 37 | + await dio.getUri<String>(url, options: option); |
| 38 | + |
| 39 | + /// Parse the HTML and select all the comment elements. |
| 40 | + final String data = response.data ?? ''; |
| 41 | + final Document document = parse(data); |
| 42 | + const String athingComtrSelector = |
| 43 | + '#hnmain > tbody > tr > td > table > tbody > .athing.comtr'; |
| 44 | + final List<Element> elements = document.querySelectorAll(athingComtrSelector); |
| 45 | + |
| 46 | + /// Verify comment text parser using the first comment element. |
| 47 | + if (elements.isNotEmpty) { |
| 48 | + final Element e = elements.first; |
| 49 | + const String commentTextSelector = |
| 50 | + '''td > table > tbody > tr > td.default > div.comment > div.commtext'''; |
| 51 | + final Element? cmtTextElement = e.querySelector(commentTextSelector); |
| 52 | + final String parsedText = |
| 53 | + await parseCommentTextHtml(cmtTextElement?.innerHtml ?? ''); |
| 54 | + |
| 55 | + if (parsedText != text) { |
| 56 | + final Uri url = |
| 57 | + Uri.parse('https://api.github.com/repos/livinglist/hacki/issues'); |
| 58 | + const String issueTitle = 'Parser check failed.'; |
| 59 | + |
| 60 | + /// Check if an issue with same title already exists. |
| 61 | + final Response<String> response = await dio.getUri<String>(url); |
| 62 | + if (response.data?.contains(issueTitle) ?? false) { |
| 63 | + print('Issue already exists.'); |
| 64 | + return; |
| 65 | + } |
| 66 | + |
| 67 | + /// Create the issue if one does not exist. |
| 68 | + final Map<String, String> githubHeaders = <String, String>{ |
| 69 | + 'Authorization': 'Bearer $token', |
| 70 | + 'X-GitHub-Api-Version': '2022-11-28', |
| 71 | + 'Content-Type': 'application/json', |
| 72 | + }; |
| 73 | + final Map<String, dynamic> githubIssuePayload = <String, dynamic>{ |
| 74 | + 'title': issueTitle, |
| 75 | + 'body': ''' |
| 76 | +| Expected | Actual | |
| 77 | +| ------------- | ------------- | |
| 78 | +| ${text.replaceAll('\n', '<br>')} | ${parsedText.replaceAll('\n', '<br>')} |''', |
| 79 | + }; |
| 80 | + await dio.postUri<String>( |
| 81 | + url, |
| 82 | + data: githubIssuePayload, |
| 83 | + options: Options( |
| 84 | + headers: githubHeaders, |
| 85 | + ), |
| 86 | + ); |
| 87 | + } |
| 88 | + } else { |
| 89 | + throw Exception('No comment from Hacker News.'); |
| 90 | + } |
| 91 | +} |
| 92 | + |
| 93 | +Future<String> parseCommentTextHtml(String text) async { |
| 94 | + return HtmlUnescape() |
| 95 | + .convert(text) |
| 96 | + .replaceAllMapped( |
| 97 | + RegExp( |
| 98 | + r'\<div class="reply"\>(.*?)\<\/div\>', |
| 99 | + dotAll: true, |
| 100 | + ), |
| 101 | + (Match match) => '', |
| 102 | + ) |
| 103 | + .replaceAllMapped( |
| 104 | + RegExp( |
| 105 | + r'\<span class="(.*?)"\>(.*?)\<\/span\>', |
| 106 | + dotAll: true, |
| 107 | + ), |
| 108 | + (Match match) => '${match[2]}', |
| 109 | + ) |
| 110 | + .replaceAllMapped( |
| 111 | + RegExp( |
| 112 | + r'\<p\>(.*?)\<\/p\>', |
| 113 | + dotAll: true, |
| 114 | + ), |
| 115 | + (Match match) => '\n\n${match[1]}', |
| 116 | + ) |
| 117 | + .replaceAllMapped( |
| 118 | + RegExp(r'\<a href=\"(.*?)\".*?\>.*?\<\/a\>'), |
| 119 | + (Match match) => match[1] ?? '', |
| 120 | + ) |
| 121 | + .replaceAllMapped( |
| 122 | + RegExp(r'\<i\>(.*?)\<\/i\>'), |
| 123 | + (Match match) => '*${match[1]}*', |
| 124 | + ) |
| 125 | + .trim(); |
| 126 | +} |
0 commit comments